[PATCH 5/5] udmabuf: remove folio pin list

Huan Yang link at vivo.com
Thu Aug 1 10:45:09 UTC 2024


Currently, udmabuf handles folio by creating an unpin list to record
each folio obtained from the list and unpinning them when released. To
maintain this approach, many data structures have been established.

However, maintaining this type of data structure requires a significant
amount of memory and traversing the list is a substantial overhead,
which is not friendly to the CPU cache, TLB, and so on.

Therefore, this patch removes the relationship between the folio and its
offset in the linear address mapping.

As an alternative, udmabuf only tracks all folio structures and splits
them into individual pages when needed by traversing them in the
required locations.(mmap/vmap, sg table.)

So, udmabuf's folios_array only save the folio struct, add nr_folios to
point how many folio saved in it.

offset is removed, and add item's offset and size to replace, due to
memfd create may have offset, we must set correctly page in folio.

So, when setup sg_table, we must start correct offset in each item at
begin, and then set each folio's page into sgtable.

Both item's offset and size number just the create list number, so,
memory size will not too large.

By doing this, we can accept the overhead of the udmabuf_folio structure
and the performance loss of traversing the list during unpinning.

Signed-off-by: Huan Yang <link at vivo.com>
---
 drivers/dma-buf/udmabuf.c | 149 +++++++++++++++++---------------------
 1 file changed, 66 insertions(+), 83 deletions(-)

diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index 677ebb2d462f..1106e0b1e746 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -25,17 +25,19 @@ module_param(size_limit_mb, int, 0644);
 MODULE_PARM_DESC(size_limit_mb, "Max size of a dmabuf, in megabytes. Default is 64.");
 
 struct udmabuf {
+	// all page's count, pagecount * PAGE_SIZE is the udmabuf's size
 	pgoff_t pagecount;
+
+	// folios array only point to each folio, do not duplicate set.
 	struct folio **folios;
+	// folios array's number
+	pgoff_t nr_folios;
+
 	struct sg_table *sg;
 	struct miscdevice *device;
-	pgoff_t *offsets;
-	struct list_head unpin_list;
-};
 
-struct udmabuf_folio {
-	struct folio *folio;
-	struct list_head list;
+	pgoff_t *item_offset;
+	size_t *item_size;
 };
 
 static struct sg_table *udmabuf_get_sg_table(struct device *dev,
@@ -118,7 +120,10 @@ static struct sg_table *get_sg_table(struct device *dev, struct dma_buf *buf,
 	struct udmabuf *ubuf = buf->priv;
 	struct sg_table *sg;
 	struct scatterlist *sgl;
-	unsigned int i = 0;
+	struct folio *folio = NULL;
+	size_t fsize, foffset;
+	unsigned int i = 0, item_idx = 0, findex = 0;
+	size_t cur_size, item_size;
 	int ret;
 
 	sg = kzalloc(sizeof(*sg), GFP_KERNEL);
@@ -129,9 +134,33 @@ static struct sg_table *get_sg_table(struct device *dev, struct dma_buf *buf,
 	if (ret < 0)
 		goto err_alloc;
 
-	for_each_sg(sg->sgl, sgl, ubuf->pagecount, i)
-		sg_set_folio(sgl, ubuf->folios[i], PAGE_SIZE,
-			     ubuf->offsets[i]);
+	cur_size = 0;
+	item_size = ubuf->item_size[0];
+	foffset = ubuf->item_offset[0];
+	folio = ubuf->folios[0];
+	fsize = folio_size(folio);
+
+	for_each_sg(sg->sgl, sgl, ubuf->pagecount, i) {
+		sg_set_folio(sgl, folio, PAGE_SIZE, foffset);
+		foffset += PAGE_SIZE;
+		cur_size += PAGE_SIZE;
+
+		// move to next folio.
+		if (foffset == fsize) {
+			++findex;
+			folio = ubuf->folios[findex];
+			fsize = folio_size(folio);
+			foffset = 0;
+		}
+
+		// if reach to next item, must check the start offset.
+		if (cur_size == item_size) {
+			++item_idx;
+			foffset = ubuf->item_offset[item_idx];
+			item_size = ubuf->item_size[item_idx];
+			cur_size = 0;
+		}
+	}
 
 	// if dev is NULL, no need to sync.
 	if (!dev)
@@ -203,34 +232,6 @@ static void unmap_udmabuf(struct dma_buf_attachment *at,
 	return put_sg_table(at->dev, sg, direction);
 }
 
-static void unpin_all_folios(struct list_head *unpin_list)
-{
-	struct udmabuf_folio *ubuf_folio;
-
-	while (!list_empty(unpin_list)) {
-		ubuf_folio = list_first_entry(unpin_list,
-					      struct udmabuf_folio, list);
-		unpin_folio(ubuf_folio->folio);
-
-		list_del(&ubuf_folio->list);
-		kfree(ubuf_folio);
-	}
-}
-
-static int add_to_unpin_list(struct list_head *unpin_list,
-			     struct folio *folio)
-{
-	struct udmabuf_folio *ubuf_folio;
-
-	ubuf_folio = kzalloc(sizeof(*ubuf_folio), GFP_KERNEL);
-	if (!ubuf_folio)
-		return -ENOMEM;
-
-	ubuf_folio->folio = folio;
-	list_add_tail(&ubuf_folio->list, unpin_list);
-	return 0;
-}
-
 static void release_udmabuf(struct dma_buf *buf)
 {
 	struct udmabuf *ubuf = buf->priv;
@@ -239,8 +240,9 @@ static void release_udmabuf(struct dma_buf *buf)
 	if (ubuf->sg)
 		put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL);
 
-	unpin_all_folios(&ubuf->unpin_list);
-	kvfree(ubuf->offsets);
+	unpin_folios(ubuf->folios, ubuf->nr_folios);
+	kfree(ubuf->item_offset);
+	kfree(ubuf->item_size);
 	kvfree(ubuf->folios);
 	kfree(ubuf);
 }
@@ -338,19 +340,18 @@ static long udmabuf_create(struct miscdevice *device,
 			   struct udmabuf_create_list *head,
 			   struct udmabuf_create_item *list)
 {
-	pgoff_t pgoff, pgcnt, pglimit, pgbuf = 0;
+	pgoff_t pgoff, pgcnt, pglimit;
 	long nr_folios, ret = -EINVAL;
 	struct file *memfd = NULL;
 	struct folio **folios;
 	struct udmabuf *ubuf;
-	u32 i, j, k, flags;
+	u32 i, flags;
 	loff_t end;
 
 	ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL);
 	if (!ubuf)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&ubuf->unpin_list);
 	pglimit = (size_limit_mb * 1024 * 1024) >> PAGE_SHIFT;
 	for (i = 0; i < head->count; i++) {
 		if (!IS_ALIGNED(list[i].offset, PAGE_SIZE))
@@ -365,20 +366,27 @@ static long udmabuf_create(struct miscdevice *device,
 	if (!ubuf->pagecount)
 		goto err;
 
-	ubuf->folios = kvmalloc_array(ubuf->pagecount, sizeof(*ubuf->folios),
-				      GFP_KERNEL);
-	if (!ubuf->folios) {
+	ubuf->item_size =
+		kmalloc_array(head->count, sizeof(size_t), GFP_KERNEL);
+	if (!ubuf->item_size)
+		return -ENOMEM;
+
+	ubuf->item_offset =
+		kmalloc_array(head->count, sizeof(pgoff_t), GFP_KERNEL);
+	if (!ubuf->item_offset) {
 		ret = -ENOMEM;
 		goto err;
 	}
-	ubuf->offsets =
-		kvcalloc(ubuf->pagecount, sizeof(*ubuf->offsets), GFP_KERNEL);
-	if (!ubuf->offsets) {
+
+	ubuf->folios = kvmalloc_array(ubuf->pagecount, sizeof(*ubuf->folios),
+				      GFP_KERNEL);
+	if (!ubuf->folios) {
 		ret = -ENOMEM;
 		goto err;
 	}
+	folios = ubuf->folios;
 
-	pgbuf = 0;
+	nr_folios = 0;
 	for (i = 0; i < head->count; i++) {
 		memfd = fget(list[i].memfd);
 		ret = check_memfd_seals(memfd);
@@ -386,49 +394,24 @@ static long udmabuf_create(struct miscdevice *device,
 			goto err;
 
 		pgcnt = list[i].size >> PAGE_SHIFT;
-		folios = kvmalloc_array(pgcnt, sizeof(*folios), GFP_KERNEL);
-		if (!folios) {
-			ret = -ENOMEM;
-			goto err;
-		}
 
 		end = list[i].offset + (pgcnt << PAGE_SHIFT) - 1;
 		ret = memfd_pin_folios(memfd, list[i].offset, end,
 				       folios, pgcnt, &pgoff);
 		if (ret <= 0) {
-			kvfree(folios);
-			if (!ret)
-				ret = -EINVAL;
+			ret = ret ?: -EINVAL;
 			goto err;
 		}
+		ubuf->item_size[i] = list[i].size;
+		ubuf->item_offset[i] = pgoff;
 
-		nr_folios = ret;
-		pgoff >>= PAGE_SHIFT;
-		for (j = 0, k = 0; j < pgcnt; j++) {
-			ubuf->folios[pgbuf] = folios[k];
-			ubuf->offsets[pgbuf] = pgoff << PAGE_SHIFT;
-
-			if (j == 0 || ubuf->folios[pgbuf-1] != folios[k]) {
-				ret = add_to_unpin_list(&ubuf->unpin_list,
-							folios[k]);
-				if (ret < 0) {
-					kfree(folios);
-					goto err;
-				}
-			}
-
-			pgbuf++;
-			if (++pgoff == folio_nr_pages(folios[k])) {
-				pgoff = 0;
-				if (++k == nr_folios)
-					break;
-			}
-		}
+		nr_folios += ret;
+		folios += ret;
 
-		kvfree(folios);
 		fput(memfd);
 		memfd = NULL;
 	}
+	ubuf->nr_folios = nr_folios;
 
 	flags = head->flags & UDMABUF_FLAGS_CLOEXEC ? O_CLOEXEC : 0;
 	ret = export_udmabuf(ubuf, device, flags);
@@ -440,8 +423,8 @@ static long udmabuf_create(struct miscdevice *device,
 err:
 	if (memfd)
 		fput(memfd);
-	unpin_all_folios(&ubuf->unpin_list);
-	kvfree(ubuf->offsets);
+	kfree(ubuf->item_size);
+	kfree(ubuf->item_offset);
 	kvfree(ubuf->folios);
 	kfree(ubuf);
 	return ret;
-- 
2.45.2



More information about the dri-devel mailing list