[PATCH v2 2/3] fbdev: Track deferred-I/O pages in pageref struct
Sam Ravnborg
sam at ravnborg.org
Mon Apr 25 18:17:07 UTC 2022
Hi Thomas,
a little ramblings below. Just my thoughts while trying to understand
the code - especially since I looked at it before.
Sam
On Mon, Apr 25, 2022 at 01:27:50PM +0200, Thomas Zimmermann wrote:
> Store the per-page state for fbdev's deferred I/O in struct
> fb_deferred_io_pageref. Maintain a list of pagerefs for the pages
> that have to be written back to video memory. Update all affected
> drivers.
>
> As with pages before, fbdev acquires a pageref when an mmaped page
> of the framebuffer is being written to. It holds the pageref in a
> list of all currently written pagerefs until it flushes the written
> pages to video memory. Writeback occurs periodically. After writeback
> fbdev releases all pagerefs and builds up a new dirty list until the
> next writeback occurs.
>
> Using pagerefs has a number of benefits.
>
> For pages of the framebuffer, the deferred I/O code used struct
> page.lru as an entry into the list of dirty pages. The lru field is
> owned by the page cache, which makes deferred I/O incompatible with
> some memory pages (e.g., most notably DRM's GEM SHMEM allocator).
> struct fb_deferred_io_pageref now provides an entry into a list of
> dirty framebuffer pages, freeing lru for use with the page cache.
>
> Drivers also assumed that struct page.index is the page offset into
> the framebuffer. This is not true for DRM buffers, which are located
> at various offset within a mapped area. struct fb_deferred_io_pageref
> explicitly stores an offset into the framebuffer. struct page.index
> is now only the page offset into the mapped area.
>
> These changes will allow DRM to use fbdev deferred I/O without an
> intermediate shadow buffer.
>
> v2:
> * minor fixes in commit message
>
> Signed-off-by: Thomas Zimmermann <tzimmermann at suse.de>
> Reviewed-by: Javier Martinez Canillas <javierm at redhat.com>
> ---
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
> index 02a4a4fa3da3..db02e17e12b6 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
> @@ -322,12 +322,13 @@ static void vmw_deferred_io(struct fb_info *info,
> struct vmw_fb_par *par = info->par;
> unsigned long start, end, min, max;
> unsigned long flags;
> - struct page *page;
> + struct fb_deferred_io_pageref *pageref;
> int y1, y2;
>
> min = ULONG_MAX;
> max = 0;
> - list_for_each_entry(page, pagelist, lru) {
> + list_for_each_entry(pageref, pagelist, list) {
> + struct page *page = pageref->page;
> start = page->index << PAGE_SHIFT;
> end = start + PAGE_SIZE - 1;
> min = min(min, start);
This is the same change in all deferred_io drivers like above.
We now traverse pageref->list where pagelist points to the head.
It took me a while to understand that pagelist points to a list of
fb_deferred_io_pageref and not a list of pages.
I had been helped, had this been renamed to s/pagelist/pagereflist/.
If you see no point in this then just ignore my comment, I have not
stared at kernel code for a while and is thus easy to confuse.
> diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c
> index 6924d489a289..a03b9c64fc61 100644
> --- a/drivers/video/fbdev/core/fb_defio.c
> +++ b/drivers/video/fbdev/core/fb_defio.c
> @@ -36,6 +36,60 @@ static struct page *fb_deferred_io_page(struct fb_info *info, unsigned long offs
> return page;
> }
>
> +static struct fb_deferred_io_pageref *fb_deferred_io_pageref_get(struct fb_info *info,
> + unsigned long offset,
> + struct page *page)
> +{
> + struct fb_deferred_io *fbdefio = info->fbdefio;
> + struct list_head *pos = &fbdefio->pagelist;
> + unsigned long pgoff = offset >> PAGE_SHIFT;
> + struct fb_deferred_io_pageref *pageref, *cur;
> +
> + if (WARN_ON_ONCE(pgoff >= info->npagerefs))
> + return NULL; /* incorrect allocation size */
> +
> + /* 1:1 mapping between pageref and page offset */
> + pageref = &info->pagerefs[pgoff];
> +
> + /*
> + * This check is to catch the case where a new process could start
> + * writing to the same page through a new PTE. This new access
> + * can cause a call to .page_mkwrite even if the original process'
> + * PTE is marked writable.
> + */
> + if (!list_empty(&pageref->list))
> + goto pageref_already_added;
> +
> + pageref->page = page;
> + pageref->offset = pgoff << PAGE_SHIFT;
> +
> + if (unlikely(fbdefio->sort_pagelist)) {
> + /*
> + * We loop through the list of pagerefs before adding, in
> + * order to keep the pagerefs sorted. This has significant
> + * overhead of O(n^2) with n being the number of written
> + * pages. If possible, drivers should try to work with
> + * unsorted page lists instead.
> + */
> + list_for_each_entry(cur, &info->fbdefio->pagelist, list) {
> + if (cur > pageref)
> + break;
> + }
> + pos = &cur->list;
> + }
> +
> + list_add_tail(&pageref->list, pos);
> +
> +pageref_already_added:
> + return pageref;
> +}
> +
> +static void fb_deferred_io_pageref_put(struct fb_deferred_io_pageref *pageref,
> + struct fb_info *info)
> +{
> + list_del_init(&pageref->list);
> +}
> +
> /* this is to find and return the vmalloc-ed fb pages */
> static vm_fault_t fb_deferred_io_fault(struct vm_fault *vmf)
> {
> @@ -59,7 +113,7 @@ static vm_fault_t fb_deferred_io_fault(struct vm_fault *vmf)
> printk(KERN_ERR "no mapping available\n");
>
> BUG_ON(!page->mapping);
> - page->index = vmf->pgoff;
> + page->index = vmf->pgoff; /* for page_mkclean() */
>
> vmf->page = page;
> return 0;
> @@ -95,7 +149,11 @@ static vm_fault_t fb_deferred_io_mkwrite(struct vm_fault *vmf)
> struct page *page = vmf->page;
> struct fb_info *info = vmf->vma->vm_private_data;
> struct fb_deferred_io *fbdefio = info->fbdefio;
> - struct list_head *pos = &fbdefio->pagelist;
> + struct fb_deferred_io_pageref *pageref;
> + unsigned long offset;
> + vm_fault_t ret;
> +
> + offset = (vmf->address - vmf->vma->vm_start);
>
> /* this is a callback we get when userspace first tries to
> write to the page. we schedule a workqueue. that workqueue
> @@ -112,6 +170,12 @@ static vm_fault_t fb_deferred_io_mkwrite(struct vm_fault *vmf)
> if (fbdefio->first_io && list_empty(&fbdefio->pagelist))
> fbdefio->first_io(info);
>
> + pageref = fb_deferred_io_pageref_get(info, offset, page);
Compared to the old code we now do all the sorting and stuff without
the page locked, which seem like a big change.
> + if (WARN_ON_ONCE(!pageref)) {
> + ret = VM_FAULT_OOM;
> + goto err_mutex_unlock;
> + }
> +
> /*
> * We want the page to remain locked from ->page_mkwrite until
> * the PTE is marked dirty to avoid page_mkclean() being called
> @@ -120,47 +184,17 @@ static vm_fault_t fb_deferred_io_mkwrite(struct vm_fault *vmf)
> * Do this by locking the page here and informing the caller
> * about it with VM_FAULT_LOCKED.
> */
> - lock_page(page);
> -
> - /*
> - * This check is to catch the case where a new process could start
> - * writing to the same page through a new PTE. This new access
> - * can cause a call to .page_mkwrite even if the original process'
> - * PTE is marked writable.
> - *
> - * TODO: The lru field is owned by the page cache; hence the name.
> - * We dequeue in fb_deferred_io_work() after flushing the
> - * page's content into video memory. Instead of lru, fbdefio
> - * should have it's own field.
> - */
> - if (!list_empty(&page->lru))
> - goto page_already_added;
> -
> - if (unlikely(fbdefio->sort_pagelist)) {
> - /*
> - * We loop through the pagelist before adding in order to
> - * keep the pagelist sorted. This has significant overhead
> - * of O(n^2) with n being the number of written pages. If
> - * possible, drivers should try to work with unsorted page
> - * lists instead.
> - */
> - struct page *cur;
> -
> - list_for_each_entry(cur, &fbdefio->pagelist, lru) {
> - if (cur->index > page->index)
> - break;
> - }
> - pos = &cur->lru;
> - }
> -
> - list_add_tail(&page->lru, pos);
> + lock_page(pageref->page);
>
> -page_already_added:
> mutex_unlock(&fbdefio->lock);
>
> /* come back after delay to process the deferred IO */
> schedule_delayed_work(&info->deferred_work, fbdefio->delay);
> return VM_FAULT_LOCKED;
> +
> +err_mutex_unlock:
> + mutex_unlock(&fbdefio->lock);
> + return ret;
> }
>
> static const struct vm_operations_struct fb_deferred_io_vm_ops = {
> @@ -186,15 +220,14 @@ EXPORT_SYMBOL_GPL(fb_deferred_io_mmap);
> /* workqueue callback */
> static void fb_deferred_io_work(struct work_struct *work)
> {
> - struct fb_info *info = container_of(work, struct fb_info,
> - deferred_work.work);
> - struct list_head *node, *next;
> - struct page *cur;
> + struct fb_info *info = container_of(work, struct fb_info, deferred_work.work);
> + struct fb_deferred_io_pageref *pageref, *next;
> struct fb_deferred_io *fbdefio = info->fbdefio;
>
> /* here we mkclean the pages, then do all deferred IO */
> mutex_lock(&fbdefio->lock);
> - list_for_each_entry(cur, &fbdefio->pagelist, lru) {
> + list_for_each_entry(pageref, &fbdefio->pagelist, list) {
> + struct page *cur = pageref->page;
> lock_page(cur);
> page_mkclean(cur);
> unlock_page(cur);
> @@ -204,30 +237,48 @@ static void fb_deferred_io_work(struct work_struct *work)
> fbdefio->deferred_io(info, &fbdefio->pagelist);
>
> /* clear the list */
> - list_for_each_safe(node, next, &fbdefio->pagelist) {
> - list_del_init(node);
> - }
> + list_for_each_entry_safe(pageref, next, &fbdefio->pagelist, list)
> + fb_deferred_io_pageref_put(pageref, info);
> +
> mutex_unlock(&fbdefio->lock);
> }
>
> -void fb_deferred_io_init(struct fb_info *info)
> +int fb_deferred_io_init(struct fb_info *info)
> {
> struct fb_deferred_io *fbdefio = info->fbdefio;
> - struct page *page;
> - unsigned int i;
> + struct fb_deferred_io_pageref *pagerefs;
> + unsigned long npagerefs, i;
> + int ret;
>
> BUG_ON(!fbdefio);
> +
> + if (WARN_ON(!info->fix.smem_len))
> + return -EINVAL;
> +
> mutex_init(&fbdefio->lock);
> INIT_DELAYED_WORK(&info->deferred_work, fb_deferred_io_work);
> INIT_LIST_HEAD(&fbdefio->pagelist);
> if (fbdefio->delay == 0) /* set a default of 1 s */
> fbdefio->delay = HZ;
>
> - /* initialize all the page lists one time */
> - for (i = 0; i < info->fix.smem_len; i += PAGE_SIZE) {
> - page = fb_deferred_io_page(info, i);
> - INIT_LIST_HEAD(&page->lru);
> + npagerefs = DIV_ROUND_UP(info->fix.smem_len, PAGE_SIZE);
> +
> + /* alloc a page ref for each page of the display memory */
> + pagerefs = kvcalloc(npagerefs, sizeof(*pagerefs), GFP_KERNEL);
> + if (!pagerefs) {
> + ret = -ENOMEM;
> + goto err;
> }
> + for (i = 0; i < npagerefs; ++i)
> + INIT_LIST_HEAD(&pagerefs[i].list);
> + info->npagerefs = npagerefs;
> + info->pagerefs = pagerefs;
> +
> + return 0;
> +
> +err:
> + mutex_destroy(&fbdefio->lock);
> + return ret;
> }
> EXPORT_SYMBOL_GPL(fb_deferred_io_init);
>
> @@ -254,6 +305,7 @@ void fb_deferred_io_cleanup(struct fb_info *info)
> page->mapping = NULL;
> }
>
> + kvfree(info->pagerefs);
> mutex_destroy(&fbdefio->lock);
> }
> EXPORT_SYMBOL_GPL(fb_deferred_io_cleanup);
> diff --git a/include/linux/fb.h b/include/linux/fb.h
> index f95da1af9ff6..a332590c0fae 100644
> --- a/include/linux/fb.h
> +++ b/include/linux/fb.h
> @@ -201,6 +201,13 @@ struct fb_pixmap {
> };
>
> #ifdef CONFIG_FB_DEFERRED_IO
> +struct fb_deferred_io_pageref {
> + struct page *page;
> + unsigned long offset;
> + /* private */
> + struct list_head list;
This is the list of pages - so this could be named pagelist...
> +};
> +
> struct fb_deferred_io {
> /* delay between mkwrite and deferred handler */
> unsigned long delay;
> @@ -468,6 +475,8 @@ struct fb_info {
> #endif
> #ifdef CONFIG_FB_DEFERRED_IO
> struct delayed_work deferred_work;
> + unsigned long npagerefs;
> + struct fb_deferred_io_pageref *pagerefs;
> struct fb_deferred_io *fbdefio;
> #endif
>
> @@ -661,7 +670,7 @@ static inline void __fb_pad_aligned_buffer(u8 *dst, u32 d_pitch,
>
> /* drivers/video/fb_defio.c */
> int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma);
> -extern void fb_deferred_io_init(struct fb_info *info);
> +extern int fb_deferred_io_init(struct fb_info *info);
> extern void fb_deferred_io_open(struct fb_info *info,
> struct inode *inode,
> struct file *file);
> --
> 2.36.0
More information about the dri-devel
mailing list