[PATCH 1/5] udmabuf: cancel mmap page fault, direct map it

Thu Aug 1 11:08:13 UTC 2024

在 2024/8/1 18:50, Christian König 写道:
> Am 01.08.24 um 12:45 schrieb Huan Yang:
>> The current udmabuf mmap uses a page fault mechanism to populate the 
>> vma.
>>
>> However, the current udmabuf has already obtained and pinned the folio
>> upon completion of the creation.This means that the physical memory has
>> already been acquired, rather than being accessed dynamically. The
>> current page fault method only saves some page table memory.
>>
>> As a result, the page fault mechanism has lost its purpose as a 
>> demanding
>> page. Due to the fact that page fault requires trapping into kernel mode
>> and filling in when accessing the corresponding virtual address in mmap,
>> this means that user mode access to virtual addresses needs to trap into
>> kernel mode.
>>
>> Therefore, when creating a large size udmabuf, this represents a
>> considerable overhead.
>>
>> Therefore, the current patch removes the page fault method of mmap and
>> instead fills it directly when mmap is triggered.
>>
>> Signed-off-by: Huan Yang <link at vivo.com>
>> ---
>>   drivers/dma-buf/udmabuf.c | 70 ++++++++++++++++++++++-----------------
>>   1 file changed, 39 insertions(+), 31 deletions(-)
>>
>> diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
>> index 047c3cd2ceff..d69aeada7367 100644
>> --- a/drivers/dma-buf/udmabuf.c
>> +++ b/drivers/dma-buf/udmabuf.c
>> @@ -38,36 +38,39 @@ struct udmabuf_folio {
>>       struct list_head list;
>>   };
>>   -static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
>> -{
>> -    struct vm_area_struct *vma = vmf->vma;
>> -    struct udmabuf *ubuf = vma->vm_private_data;
>> -    pgoff_t pgoff = vmf->pgoff;
>> -    unsigned long pfn;
>> -
>> -    if (pgoff >= ubuf->pagecount)
>> -        return VM_FAULT_SIGBUS;
>> -
>> -    pfn = folio_pfn(ubuf->folios[pgoff]);
>> -    pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
>> -
>> -    return vmf_insert_pfn(vma, vmf->address, pfn);
>> -}
>> -
>> -static const struct vm_operations_struct udmabuf_vm_ops = {
>> -    .fault = udmabuf_vm_fault,
>> -};
>> +static struct sg_table *get_sg_table(struct device *dev, struct 
>> dma_buf *buf,
>> +                     enum dma_data_direction direction);
>>     static int mmap_udmabuf(struct dma_buf *buf, struct 
>> vm_area_struct *vma)
>>   {
>>       struct udmabuf *ubuf = buf->priv;
>> +    struct sg_table *table = ubuf->sg;
>> +    unsigned long addr = vma->vm_start;
>> +    struct sg_page_iter piter;
>> +    int ret;
>>         if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
>>           return -EINVAL;
>>   -    vma->vm_ops = &udmabuf_vm_ops;
>> -    vma->vm_private_data = ubuf;
>> -    vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
>> +    if (!table) {
>> +        table = get_sg_table(NULL, buf, 0);
>> +        if (IS_ERR(table))
>> +            return PTR_ERR(table);
>> +        ubuf->sg = table;
>> +    }
>> +
>> +    for_each_sgtable_page(table, &piter, vma->vm_pgoff) {
>
> That might not work correctly. We intentionally remove the pages from 
> the sgtable when it is shared between devices.
>
> Additional to that the sgtable is *not* a page container, but rather a 
> DMA address container. So that here is also a rather bad idea from the 
> design side.

Sorry for that and patch 1 3 4's ops. I was not aware of this before. 
All idea to do this in mmap/vmap is just like system_heap and any other 
heaps that I learned.

But well to learn it.

BTW, sgtable is a wrong idea to maintain page, maybe we need to both 
setup page's array(order 0 page), and folio's array(only the folio, use 
for unpin)?

Or else, mapping page to vm_off and vma solely through folio's array is 
quite challenging.
Moreover, even in this way, the memory overhead is smaller than the 
current unpin list.

Thanks for your correct.:)

>
> Regards,
> Christian.
>
>> +        struct page *page = sg_page_iter_page(&piter);
>> +
>> +        ret = remap_pfn_range(vma, addr, page_to_pfn(page), PAGE_SIZE,
>> +                      vma->vm_page_prot);
>> +        if (ret)
>> +            return ret;
>> +        addr += PAGE_SIZE;
>> +        if (addr >= vma->vm_end)
>> +            return 0;
>> +    }
>> +
>>       return 0;
>>   }
>>   @@ -126,6 +129,10 @@ static struct sg_table *get_sg_table(struct 
>> device *dev, struct dma_buf *buf,
>>           sg_set_folio(sgl, ubuf->folios[i], PAGE_SIZE,
>>                    ubuf->offsets[i]);
>>   +    // if dev is NULL, no need to sync.
>> +    if (!dev)
>> +        return sg;
>> +
>>       ret = dma_map_sgtable(dev, sg, direction, 0);
>>       if (ret < 0)
>>           goto err_map;
>> @@ -206,20 +213,21 @@ static int begin_cpu_udmabuf(struct dma_buf *buf,
>>   {
>>       struct udmabuf *ubuf = buf->priv;
>>       struct device *dev = ubuf->device->this_device;
>> -    int ret = 0;
>> +    struct sg_table *sg;
>>   -    if (!ubuf->sg) {
>> -        ubuf->sg = get_sg_table(dev, buf, direction);
>> -        if (IS_ERR(ubuf->sg)) {
>> -            ret = PTR_ERR(ubuf->sg);
>> -            ubuf->sg = NULL;
>> -        }
>> -    } else {
>> +    if (ubuf->sg) {
>>           dma_sync_sg_for_cpu(dev, ubuf->sg->sgl, ubuf->sg->nents,
>>                       direction);
>> +        return 0;
>>       }
>>   -    return ret;
>> +    sg = get_sg_table(dev, buf, direction);
>> +    if (IS_ERR(sg))
>> +        return PTR_ERR(sg);
>> +
>> +    ubuf->sg = sg;
>> +
>> +    return 0;
>>   }
>>     static int end_cpu_udmabuf(struct dma_buf *buf,
>