[RFC PATCH 01/10] dma-buf: add support for paged attachment mappings

Christian König christian.koenig at amd.com
Tue Jul 11 07:59:00 UTC 2023


Am 11.07.23 um 00:32 schrieb Mina Almasry:
> Currently dmabuf p2p memory doesn't present itself in the form of struct
> pages and the memory can't be easily used with code that expects memory
> in that form.

Well, this won't fly at all.

First of all DMA-buf is *not* about passing struct pages around, drivers 
are *only* supposed to use the DMA addresses.

That code can't use the pages inside a DMA-buf is absolutely 
intentional. We even mangle the page pointers from the sg_tables because 
people sometimes get the impression they can use those.

See function mangle_sg_table() in dma-buf.c

         /* To catch abuse of the underlying struct page by importers mix
          * up the bits, but take care to preserve the low SG_ bits to
          * not corrupt the sgt. The mixing is undone in __unmap_dma_buf
          * before passing the sgt back to the exporter. */
         for_each_sgtable_sg(sg_table, sg, i)
                 sg->page_link ^= ~0xffUL;

> Add support for paged attachment mappings. We use existing
> dmabuf APIs to create a mapped attachment (dma_buf_attach() &
> dma_buf_map_attachment()), and we create struct pages for this mapping.
> We write the dma_addr's from the sg_table into the created pages.

Hui, what? Not really.

Regards,
Christian.


>   These
> pages can then be passed into code that expects struct pages and can
> largely operate on these pages with minimal modifications:
>
> 1. The pages need not be dma mapped. The dma addr can be queried from
>     page->zone_device_data and used directly.
> 2. The pages are not kmappable.
>
> Add a new ioctl that enables the user to create a struct page backed
> dmabuf attachment mapping. This ioctl returns a new file descriptor
> which represents the dmabuf pages. The pages are freed when (a) the
> user closes the file, and (b) the struct pages backing the dmabuf are
> no longer in use. Once the pages are no longer in use, the mapped
> attachment is removed.
>
> The support added in this patch should be generic - the pages are created
> by the base code, but the user specifies the type of page to create using
> the dmabuf_create_pages_info->type flag. The base code hands of any
> handling specific to the use case of the ops of that page type.
>
> Signed-off-by: Mina Almasry <almasrymina at google.com>
> ---
>   drivers/dma-buf/dma-buf.c    | 223 +++++++++++++++++++++++++++++++++++
>   include/linux/dma-buf.h      |  90 ++++++++++++++
>   include/uapi/linux/dma-buf.h |   9 ++
>   3 files changed, 322 insertions(+)
>
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index aa4ea8530cb3..50b1d813cf5c 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -22,6 +22,7 @@
>   #include <linux/module.h>
>   #include <linux/seq_file.h>
>   #include <linux/sync_file.h>
> +#include <linux/pci.h>
>   #include <linux/poll.h>
>   #include <linux/dma-resv.h>
>   #include <linux/mm.h>
> @@ -442,12 +443,16 @@ static long dma_buf_import_sync_file(struct dma_buf *dmabuf,
>   }
>   #endif
>   
> +static long dma_buf_create_pages(struct file *file,
> +				 struct dma_buf_create_pages_info *create_info);
> +
>   static long dma_buf_ioctl(struct file *file,
>   			  unsigned int cmd, unsigned long arg)
>   {
>   	struct dma_buf *dmabuf;
>   	struct dma_buf_sync sync;
>   	enum dma_data_direction direction;
> +	struct dma_buf_create_pages_info create_info;
>   	int ret;
>   
>   	dmabuf = file->private_data;
> @@ -484,6 +489,12 @@ static long dma_buf_ioctl(struct file *file,
>   	case DMA_BUF_SET_NAME_A:
>   	case DMA_BUF_SET_NAME_B:
>   		return dma_buf_set_name(dmabuf, (const char __user *)arg);
> +	case DMA_BUF_CREATE_PAGES:
> +		if (copy_from_user(&create_info, (void __user *)arg,
> +				   sizeof(create_info)))
> +			return -EFAULT;
> +
> +		return dma_buf_create_pages(file, &create_info);
>   
>   #if IS_ENABLED(CONFIG_SYNC_FILE)
>   	case DMA_BUF_IOCTL_EXPORT_SYNC_FILE:
> @@ -1613,6 +1624,218 @@ void dma_buf_vunmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map)
>   }
>   EXPORT_SYMBOL_NS_GPL(dma_buf_vunmap_unlocked, DMA_BUF);
>   
> +static int dma_buf_pages_release(struct inode *inode, struct file *file)
> +{
> +	struct dma_buf_pages *priv = file->private_data;
> +
> +	if (priv->type_ops->dma_buf_pages_release)
> +		priv->type_ops->dma_buf_pages_release(priv, file);
> +
> +	percpu_ref_kill(&priv->pgmap.ref);
> +	/* Drop initial ref after percpu_ref_kill(). */
> +	percpu_ref_put(&priv->pgmap.ref);
> +
> +	return 0;
> +}
> +
> +static void dma_buf_page_free(struct page *page)
> +{
> +	struct dma_buf_pages *priv;
> +	struct dev_pagemap *pgmap;
> +
> +	pgmap = page->pgmap;
> +	priv = container_of(pgmap, struct dma_buf_pages, pgmap);
> +
> +	if (priv->type_ops->dma_buf_page_free)
> +		priv->type_ops->dma_buf_page_free(priv, page);
> +}
> +
> +const struct dev_pagemap_ops dma_buf_pgmap_ops = {
> +	.page_free	= dma_buf_page_free,
> +};
> +EXPORT_SYMBOL_GPL(dma_buf_pgmap_ops);
> +
> +const struct file_operations dma_buf_pages_fops = {
> +	.release	= dma_buf_pages_release,
> +};
> +EXPORT_SYMBOL_GPL(dma_buf_pages_fops);
> +
> +#ifdef CONFIG_ZONE_DEVICE
> +static void dma_buf_pages_destroy(struct percpu_ref *ref)
> +{
> +	struct dma_buf_pages *priv;
> +	struct dev_pagemap *pgmap;
> +
> +	pgmap = container_of(ref, struct dev_pagemap, ref);
> +	priv = container_of(pgmap, struct dma_buf_pages, pgmap);
> +
> +	if (priv->type_ops->dma_buf_pages_destroy)
> +		priv->type_ops->dma_buf_pages_destroy(priv);
> +
> +	kvfree(priv->pages);
> +	kfree(priv);
> +
> +	dma_buf_unmap_attachment(priv->attachment, priv->sgt, priv->direction);
> +	dma_buf_detach(priv->dmabuf, priv->attachment);
> +	dma_buf_put(priv->dmabuf);
> +	pci_dev_put(priv->pci_dev);
> +}
> +
> +static long dma_buf_create_pages(struct file *file,
> +				 struct dma_buf_create_pages_info *create_info)
> +{
> +	int err, fd, i, pg_idx;
> +	struct scatterlist *sg;
> +	struct dma_buf_pages *priv;
> +	struct file *new_file;
> +
> +	fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
> +	if (fd < 0) {
> +		err = fd;
> +		goto out_err;
> +	}
> +
> +	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> +	if (!priv) {
> +		err = -ENOMEM;
> +		goto out_put_fd;
> +	}
> +
> +	priv->pgmap.type = MEMORY_DEVICE_PRIVATE;
> +	priv->pgmap.ops = &dma_buf_pgmap_ops;
> +	init_completion(&priv->pgmap.done);
> +
> +	/* This refcount is incremented every time a page in priv->pages is
> +	 * allocated, and decremented every time a page is freed. When
> +	 * it drops to 0, the dma_buf_pages can be destroyed. An initial ref is
> +	 * held and the dma_buf_pages is not destroyed until that is dropped.
> +	 */
> +	err = percpu_ref_init(&priv->pgmap.ref, dma_buf_pages_destroy, 0,
> +			      GFP_KERNEL);
> +	if (err)
> +		goto out_free_priv;
> +
> +	/* Initial ref to be dropped after percpu_ref_kill(). */
> +	percpu_ref_get(&priv->pgmap.ref);
> +
> +	priv->pci_dev = pci_get_domain_bus_and_slot(
> +		0, create_info->pci_bdf[0],
> +		PCI_DEVFN(create_info->pci_bdf[1], create_info->pci_bdf[2]));
> +	if (!priv->pci_dev) {
> +		err = -ENODEV;
> +		goto out_exit_percpu_ref;
> +	}
> +
> +	priv->dmabuf = dma_buf_get(create_info->dma_buf_fd);
> +	if (IS_ERR(priv->dmabuf)) {
> +		err = PTR_ERR(priv->dmabuf);
> +		goto out_put_pci_dev;
> +	}
> +
> +	if (priv->dmabuf->size % PAGE_SIZE != 0) {
> +		err = -EINVAL;
> +		goto out_put_dma_buf;
> +	}
> +
> +	priv->attachment = dma_buf_attach(priv->dmabuf, &priv->pci_dev->dev);
> +	if (IS_ERR(priv->attachment)) {
> +		err = PTR_ERR(priv->attachment);
> +		goto out_put_dma_buf;
> +	}
> +
> +	priv->num_pages = priv->dmabuf->size / PAGE_SIZE;
> +	priv->pages = kvmalloc_array(priv->num_pages, sizeof(struct page),
> +				     GFP_KERNEL);
> +	if (!priv->pages) {
> +		err = -ENOMEM;
> +		goto out_detach_dma_buf;
> +	}
> +
> +	for (i = 0; i < priv->num_pages; i++) {
> +		struct page *page = &priv->pages[i];
> +
> +		mm_zero_struct_page(page);
> +		set_page_zone(page, ZONE_DEVICE);
> +		set_page_count(page, 1);
> +		page->pgmap = &priv->pgmap;
> +	}
> +
> +	priv->direction = DMA_BIDIRECTIONAL;
> +	priv->sgt = dma_buf_map_attachment(priv->attachment, priv->direction);
> +	if (IS_ERR(priv->sgt)) {
> +		err = PTR_ERR(priv->sgt);
> +		goto out_free_pages;
> +	}
> +
> +	/* write each dma addresses from sgt to each page */
> +	pg_idx = 0;
> +	for_each_sgtable_dma_sg(priv->sgt, sg, i) {
> +		size_t len = sg_dma_len(sg);
> +		dma_addr_t dma_addr = sg_dma_address(sg);
> +
> +		BUG_ON(!PAGE_ALIGNED(len));
> +		while (len > 0) {
> +			priv->pages[pg_idx].zone_device_data = (void *)dma_addr;
> +			pg_idx++;
> +			dma_addr += PAGE_SIZE;
> +			len -= PAGE_SIZE;
> +		}
> +	}
> +
> +	new_file = anon_inode_getfile("[dma_buf_pages]", &dma_buf_pages_fops,
> +				      (void *)priv, O_RDWR | O_CLOEXEC);
> +	if (IS_ERR(new_file)) {
> +		err = PTR_ERR(new_file);
> +		goto out_unmap_dma_buf;
> +	}
> +
> +	priv->type = create_info->type;
> +	priv->create_flags = create_info->create_flags;
> +
> +	switch (priv->type) {
> +	default:
> +		err = -EINVAL;
> +		goto out_put_new_file;
> +	}
> +
> +	if (priv->type_ops->dma_buf_pages_init) {
> +		err = priv->type_ops->dma_buf_pages_init(priv, new_file);
> +		if (err)
> +			goto out_put_new_file;
> +	}
> +
> +	fd_install(fd, new_file);
> +	return fd;
> +
> +out_put_new_file:
> +	fput(new_file);
> +out_unmap_dma_buf:
> +	dma_buf_unmap_attachment(priv->attachment, priv->sgt, priv->direction);
> +out_free_pages:
> +	kvfree(priv->pages);
> +out_detach_dma_buf:
> +	dma_buf_detach(priv->dmabuf, priv->attachment);
> +out_put_dma_buf:
> +	dma_buf_put(priv->dmabuf);
> +out_put_pci_dev:
> +	pci_dev_put(priv->pci_dev);
> +out_exit_percpu_ref:
> +	percpu_ref_exit(&priv->pgmap.ref);
> +out_free_priv:
> +	kfree(priv);
> +out_put_fd:
> +	put_unused_fd(fd);
> +out_err:
> +	return err;
> +}
> +#else
> +static long dma_buf_create_pages(struct file *file,
> +				 struct dma_buf_create_pages_info *create_info)
> +{
> +	return -ENOTSUPP;
> +}
> +#endif
> +
>   #ifdef CONFIG_DEBUG_FS
>   static int dma_buf_debug_show(struct seq_file *s, void *unused)
>   {
> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> index 3f31baa3293f..5789006180ea 100644
> --- a/include/linux/dma-buf.h
> +++ b/include/linux/dma-buf.h
> @@ -540,6 +540,36 @@ struct dma_buf_export_info {
>   	void *priv;
>   };
>   
> +struct dma_buf_pages;
> +
> +struct dma_buf_pages_type_ops {
> +	int (*dma_buf_pages_init)(struct dma_buf_pages *priv,
> +				  struct file *file);
> +	void (*dma_buf_pages_release)(struct dma_buf_pages *priv,
> +				      struct file *file);
> +	void (*dma_buf_pages_destroy)(struct dma_buf_pages *priv);
> +	void (*dma_buf_page_free)(struct dma_buf_pages *priv,
> +				  struct page *page);
> +};
> +
> +struct dma_buf_pages {
> +	/* fields for dmabuf */
> +	struct dma_buf *dmabuf;
> +	struct dma_buf_attachment *attachment;
> +	struct sg_table *sgt;
> +	struct pci_dev *pci_dev;
> +	enum dma_data_direction direction;
> +
> +	/* fields for dma-buf pages */
> +	size_t num_pages;
> +	struct page *pages;
> +	struct dev_pagemap pgmap;
> +
> +	unsigned int type;
> +	const struct dma_buf_pages_type_ops *type_ops;
> +	__u64 create_flags;
> +};
> +
>   /**
>    * DEFINE_DMA_BUF_EXPORT_INFO - helper macro for exporters
>    * @name: export-info name
> @@ -631,4 +661,64 @@ int dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map *map);
>   void dma_buf_vunmap(struct dma_buf *dmabuf, struct iosys_map *map);
>   int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map);
>   void dma_buf_vunmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map);
> +
> +#ifdef CONFIG_DMA_SHARED_BUFFER
> +extern const struct file_operations dma_buf_pages_fops;
> +extern const struct dev_pagemap_ops dma_buf_pgmap_ops;
> +
> +static inline bool is_dma_buf_pages_file(struct file *file)
> +{
> +	return file->f_op == &dma_buf_pages_fops;
> +}
> +
> +static inline bool is_dma_buf_page(struct page *page)
> +{
> +	return (is_zone_device_page(page) && page->pgmap &&
> +		page->pgmap->ops == &dma_buf_pgmap_ops);
> +}
> +
> +static inline dma_addr_t dma_buf_page_to_dma_addr(struct page *page)
> +{
> +	return (dma_addr_t)page->zone_device_data;
> +}
> +
> +static inline int dma_buf_map_sg(struct device *dev, struct scatterlist *sg,
> +				 int nents, enum dma_data_direction dir)
> +{
> +	struct scatterlist *s;
> +	int i;
> +
> +	for_each_sg(sg, s, nents, i) {
> +		struct page *pg = sg_page(s);
> +
> +		s->dma_address = dma_buf_page_to_dma_addr(pg);
> +		sg_dma_len(s) = s->length;
> +	}
> +
> +	return nents;
> +}
> +#else
> +static inline bool is_dma_buf_page(struct page *page)
> +{
> +	return false;
> +}
> +
> +static inline bool is_dma_buf_pages_file(struct file *file)
> +{
> +	return false;
> +}
> +
> +static inline dma_addr_t dma_buf_page_to_dma_addr(struct page *page)
> +{
> +	return 0;
> +}
> +
> +static inline int dma_buf_map_sg(struct device *dev, struct scatterlist *sg,
> +				 int nents, enum dma_data_direction dir)
> +{
> +	return 0;
> +}
> +#endif
> +
> +
>   #endif /* __DMA_BUF_H__ */
> diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h
> index 5a6fda66d9ad..d0f63a2ab7e4 100644
> --- a/include/uapi/linux/dma-buf.h
> +++ b/include/uapi/linux/dma-buf.h
> @@ -179,4 +179,13 @@ struct dma_buf_import_sync_file {
>   #define DMA_BUF_IOCTL_EXPORT_SYNC_FILE	_IOWR(DMA_BUF_BASE, 2, struct dma_buf_export_sync_file)
>   #define DMA_BUF_IOCTL_IMPORT_SYNC_FILE	_IOW(DMA_BUF_BASE, 3, struct dma_buf_import_sync_file)
>   
> +struct dma_buf_create_pages_info {
> +	__u8 pci_bdf[3];
> +	__s32 dma_buf_fd;
> +	__u32 type;
> +	__u64 create_flags;
> +};
> +
> +#define DMA_BUF_CREATE_PAGES	_IOW(DMA_BUF_BASE, 4, struct dma_buf_create_pages_info)
> +
>   #endif



More information about the dri-devel mailing list