[PATCH v3 5/5] udmabuf: remove udmabuf_folio
Kasireddy, Vivek
vivek.kasireddy at intel.com
Sat Aug 17 01:05:29 UTC 2024
Hi Huan,
>
> Currently, udmabuf handles folio by creating an unpin list to record
> each folio obtained from the list and unpinning them when released. To
> maintain this approach, many data structures have been established.
>
> However, maintaining this type of data structure requires a significant
> amount of memory and traversing the list is a substantial overhead,
> which is not friendly to the CPU cache.
>
> Considering that during creation, we arranged the folio array in the
> order of pin and set the offset according to pgcnt.
>
> We actually don't need to use unpin_list to unpin during release.
> Instead, we can iterate through the folios array during release and
> unpin any folio that is different from the ones previously accessed.
No, that won't work because iterating the folios array doesn't tell you
anything about how many times a folio was pinned (via memfd_pin_folios()),
as a folio could be part of multiple ranges.
For example, if userspace provides ranges 64..128 and 256..512 (assuming
these are 4k sized subpage offsets and we have a 2MB hugetlb folio), then
the same folio would cover both ranges and there would be 2 entries for
this folio in unpin_list. But, with your logic, we'd be erroneously unpinning
it only once.
Not sure if there are any great solutions available to address this situation,
but one option I can think of is to convert unpin_list to unpin array (dynamically
resized with krealloc?) and track its length separately. Or, as suggested earlier,
another way is to not use unpin_list for memfds backed by shmem, but I suspect
this may not work if THP is enabled.
Thanks,
Vivek
>
> By this, not only saves the overhead of the udmabuf_folio data structure
> but also makes array access more cache-friendly.
>
> Signed-off-by: Huan Yang <link at vivo.com>
> ---
> drivers/dma-buf/udmabuf.c | 68 +++++++++++++++++----------------------
> 1 file changed, 30 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
> index 8f9cb0e2e71a..1e7f46c33d1a 100644
> --- a/drivers/dma-buf/udmabuf.c
> +++ b/drivers/dma-buf/udmabuf.c
> @@ -26,16 +26,19 @@ MODULE_PARM_DESC(size_limit_mb, "Max size of a
> dmabuf, in megabytes. Default is
>
> struct udmabuf {
> pgoff_t pagecount;
> - struct folio **folios;
> struct sg_table *sg;
> struct miscdevice *device;
> + struct folio **folios;
> + /**
> + * offset in folios array's folio, byte unit.
> + * udmabuf can use either shmem or hugetlb pages, an array based
> on
> + * pages may not be suitable.
> + * Especially when HVO is enabled, the tail page will be released,
> + * so our reference to the page will no longer be correct.
> + * Hence, it's necessary to record the offset in order to reference
> + * the correct PFN within the folio.
> + */
> pgoff_t *offsets;
> - struct list_head unpin_list;
> -};
> -
> -struct udmabuf_folio {
> - struct folio *folio;
> - struct list_head list;
> };
>
> static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct
> *vma)
> @@ -160,32 +163,28 @@ static void unmap_udmabuf(struct
> dma_buf_attachment *at,
> return put_sg_table(at->dev, sg, direction);
> }
>
> -static void unpin_all_folios(struct list_head *unpin_list)
> +/**
> + * unpin_all_folios: unpin each folio we pinned in create
> + * The udmabuf set all folio in folios and pinned it, but for large folio,
> + * We may have only used a small portion of the physical in the folio.
> + * we will repeatedly, sequentially set the folio into the array to ensure
> + * that the offset can index the correct folio at the corresponding index.
> + * Hence, we only need to unpin the first iterred folio.
> + */
> +static void unpin_all_folios(struct udmabuf *ubuf)
> {
> - struct udmabuf_folio *ubuf_folio;
> -
> - while (!list_empty(unpin_list)) {
> - ubuf_folio = list_first_entry(unpin_list,
> - struct udmabuf_folio, list);
> - unpin_folio(ubuf_folio->folio);
> -
> - list_del(&ubuf_folio->list);
> - kfree(ubuf_folio);
> - }
> -}
> + pgoff_t pg;
> + struct folio *last = NULL;
>
> -static int add_to_unpin_list(struct list_head *unpin_list,
> - struct folio *folio)
> -{
> - struct udmabuf_folio *ubuf_folio;
> + for (pg = 0; pg < ubuf->pagecount; pg++) {
> + struct folio *tmp = ubuf->folios[pg];
>
> - ubuf_folio = kzalloc(sizeof(*ubuf_folio), GFP_KERNEL);
> - if (!ubuf_folio)
> - return -ENOMEM;
> + if (tmp == last)
> + continue;
>
> - ubuf_folio->folio = folio;
> - list_add_tail(&ubuf_folio->list, unpin_list);
> - return 0;
> + last = tmp;
> + unpin_folio(tmp);
> + }
> }
>
> static void release_udmabuf(struct dma_buf *buf)
> @@ -196,7 +195,7 @@ static void release_udmabuf(struct dma_buf *buf)
> if (ubuf->sg)
> put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL);
>
> - unpin_all_folios(&ubuf->unpin_list);
> + unpin_all_folios(ubuf);
> kvfree(ubuf->offsets);
> kvfree(ubuf->folios);
> kfree(ubuf);
> @@ -308,7 +307,6 @@ static long udmabuf_create(struct miscdevice
> *device,
> if (!ubuf)
> return -ENOMEM;
>
> - INIT_LIST_HEAD(&ubuf->unpin_list);
> pglimit = (size_limit_mb * 1024 * 1024) >> PAGE_SHIFT;
> for (i = 0; i < head->count; i++) {
> if (!IS_ALIGNED(list[i].offset, PAGE_SIZE))
> @@ -366,12 +364,6 @@ static long udmabuf_create(struct miscdevice
> *device,
> u32 k;
> long fsize = folio_size(folios[j]);
>
> - ret = add_to_unpin_list(&ubuf->unpin_list, folios[j]);
> - if (ret < 0) {
> - kfree(folios);
> - goto err;
> - }
> -
> for (k = pgoff; k < fsize; k += PAGE_SIZE) {
> ubuf->folios[pgbuf] = folios[j];
> ubuf->offsets[pgbuf] = k;
> @@ -399,7 +391,7 @@ static long udmabuf_create(struct miscdevice
> *device,
> err:
> if (memfd)
> fput(memfd);
> - unpin_all_folios(&ubuf->unpin_list);
> + unpin_all_folios(ubuf);
> kvfree(ubuf->offsets);
> kvfree(ubuf->folios);
> kfree(ubuf);
> --
> 2.45.2
More information about the dri-devel
mailing list