[Intel-gfx] [PATCH 22/29] drm/i915: Handle stolen objects in pwrite
Daniel Vetter
daniel at ffwll.ch
Mon Aug 20 21:56:08 CEST 2012
On Sat, Aug 11, 2012 at 03:41:21PM +0100, Chris Wilson wrote:
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
What about putting kmap/unmap abstractions into obj->ops (like the dma_buf
interface already has)? Since the pwrite/pread code is already rather
branch heave I hope we don't see the overhead of the indirect call even
in microbenchmarks (haven't checked). And this way we would also neatly
wrap up dma_bufs for pwrite (if anyone ever really wants that ...).
The kmap(_atomic) for stolen mem backed objects would boil down to doing
the pointer arithmetic, kunmap would be just a noop.
Cheers, Daniel
> ---
> drivers/gpu/drm/i915/i915_gem.c | 169 +++++++++++++++++++++++++--------------
> 1 file changed, 111 insertions(+), 58 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 552f95b..a2fb2aa 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -664,19 +664,17 @@ out:
> * needs_clflush_before is set and flushes out any written cachelines after
> * writing if needs_clflush is set. */
> static int
> -shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
> +shmem_pwrite_fast(char *vaddr, int shmem_page_offset, int page_length,
> char __user *user_data,
> bool page_do_bit17_swizzling,
> bool needs_clflush_before,
> bool needs_clflush_after)
> {
> - char *vaddr;
> int ret;
>
> if (unlikely(page_do_bit17_swizzling))
> return -EINVAL;
>
> - vaddr = kmap_atomic(page);
> if (needs_clflush_before)
> drm_clflush_virt_range(vaddr + shmem_page_offset,
> page_length);
> @@ -686,7 +684,6 @@ shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
> if (needs_clflush_after)
> drm_clflush_virt_range(vaddr + shmem_page_offset,
> page_length);
> - kunmap_atomic(vaddr);
>
> return ret ? -EFAULT : 0;
> }
> @@ -694,16 +691,14 @@ shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
> /* Only difference to the fast-path function is that this can handle bit17
> * and uses non-atomic copy and kmap functions. */
> static int
> -shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
> +shmem_pwrite_slow(char *vaddr, int shmem_page_offset, int page_length,
> char __user *user_data,
> bool page_do_bit17_swizzling,
> bool needs_clflush_before,
> bool needs_clflush_after)
> {
> - char *vaddr;
> int ret;
>
> - vaddr = kmap(page);
> if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
> shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
> page_length,
> @@ -720,7 +715,6 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
> shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
> page_length,
> page_do_bit17_swizzling);
> - kunmap(page);
>
> return ret ? -EFAULT : 0;
> }
> @@ -731,6 +725,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
> struct drm_i915_gem_pwrite *args,
> struct drm_file *file)
> {
> + struct drm_i915_private *dev_priv = dev->dev_private;
> ssize_t remain;
> loff_t offset;
> char __user *user_data;
> @@ -770,74 +765,132 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
> if (ret)
> return ret;
>
> - i915_gem_object_pin_pages(obj);
> -
> offset = args->offset;
> obj->dirty = 1;
>
> - for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> - struct page *page;
> - int partial_cacheline_write;
> + if (obj->stolen) {
> + while (remain > 0) {
> + char *vaddr;
> + int partial_cacheline_write;
>
> - if (i < offset >> PAGE_SHIFT)
> - continue;
> + /* Operation in this page
> + *
> + * shmem_page_offset = offset within page in shmem file
> + * page_length = bytes to copy for this page
> + */
> + shmem_page_offset = offset_in_page(offset);
>
> - if (remain <= 0)
> - break;
> + page_length = remain;
> + if ((shmem_page_offset + page_length) > PAGE_SIZE)
> + page_length = PAGE_SIZE - shmem_page_offset;
>
> - /* Operation in this page
> - *
> - * shmem_page_offset = offset within page in shmem file
> - * page_length = bytes to copy for this page
> - */
> - shmem_page_offset = offset_in_page(offset);
> + /* If we don't overwrite a cacheline completely we need to be
> + * careful to have up-to-date data by first clflushing. Don't
> + * overcomplicate things and flush the entire patch. */
> + partial_cacheline_write = needs_clflush_before &&
> + ((shmem_page_offset | page_length)
> + & (boot_cpu_data.x86_clflush_size - 1));
>
> - page_length = remain;
> - if ((shmem_page_offset + page_length) > PAGE_SIZE)
> - page_length = PAGE_SIZE - shmem_page_offset;
> + vaddr = (char *)(dev_priv->mm.stolen_base + obj->stolen->start + offset);
> + page_do_bit17_swizzling = obj_do_bit17_swizzling &&
> + ((uintptr_t)vaddr & (1 << 17)) != 0;
>
> - /* If we don't overwrite a cacheline completely we need to be
> - * careful to have up-to-date data by first clflushing. Don't
> - * overcomplicate things and flush the entire patch. */
> - partial_cacheline_write = needs_clflush_before &&
> - ((shmem_page_offset | page_length)
> - & (boot_cpu_data.x86_clflush_size - 1));
> + ret = shmem_pwrite_fast(vaddr, shmem_page_offset, page_length,
> + user_data, page_do_bit17_swizzling,
> + partial_cacheline_write,
> + needs_clflush_after);
>
> - page = sg_page(sg);
> - page_do_bit17_swizzling = obj_do_bit17_swizzling &&
> - (page_to_phys(page) & (1 << 17)) != 0;
> + if (ret == 0)
> + goto next_stolen;
>
> - ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
> - user_data, page_do_bit17_swizzling,
> - partial_cacheline_write,
> - needs_clflush_after);
> - if (ret == 0)
> - goto next_page;
> + hit_slowpath = 1;
> + mutex_unlock(&dev->struct_mutex);
>
> - hit_slowpath = 1;
> - mutex_unlock(&dev->struct_mutex);
> - ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
> - user_data, page_do_bit17_swizzling,
> - partial_cacheline_write,
> - needs_clflush_after);
> + ret = shmem_pwrite_slow(vaddr, shmem_page_offset, page_length,
> + user_data, page_do_bit17_swizzling,
> + partial_cacheline_write,
> + needs_clflush_after);
>
> - mutex_lock(&dev->struct_mutex);
> + mutex_lock(&dev->struct_mutex);
> + if (ret)
> + goto out;
>
> -next_page:
> - set_page_dirty(page);
> - mark_page_accessed(page);
> +next_stolen:
> + remain -= page_length;
> + user_data += page_length;
> + offset += page_length;
> + }
> + } else {
> + i915_gem_object_pin_pages(obj);
>
> - if (ret)
> - goto out;
> + for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> + struct page *page;
> + char *vaddr;
> + int partial_cacheline_write;
>
> - remain -= page_length;
> - user_data += page_length;
> - offset += page_length;
> + if (i < offset >> PAGE_SHIFT)
> + continue;
> +
> + if (remain <= 0)
> + break;
> +
> + /* Operation in this page
> + *
> + * shmem_page_offset = offset within page in shmem file
> + * page_length = bytes to copy for this page
> + */
> + shmem_page_offset = offset_in_page(offset);
> +
> + page_length = remain;
> + if ((shmem_page_offset + page_length) > PAGE_SIZE)
> + page_length = PAGE_SIZE - shmem_page_offset;
> +
> + /* If we don't overwrite a cacheline completely we need to be
> + * careful to have up-to-date data by first clflushing. Don't
> + * overcomplicate things and flush the entire patch. */
> + partial_cacheline_write = needs_clflush_before &&
> + ((shmem_page_offset | page_length)
> + & (boot_cpu_data.x86_clflush_size - 1));
> +
> + page = sg_page(sg);
> + page_do_bit17_swizzling = obj_do_bit17_swizzling &&
> + (page_to_phys(page) & (1 << 17)) != 0;
> +
> + vaddr = kmap_atomic(page);
> + ret = shmem_pwrite_fast(vaddr, shmem_page_offset, page_length,
> + user_data, page_do_bit17_swizzling,
> + partial_cacheline_write,
> + needs_clflush_after);
> +
> + kunmap_atomic(vaddr);
> +
> + if (ret == 0)
> + goto next_page;
> +
> + hit_slowpath = 1;
> + mutex_unlock(&dev->struct_mutex);
> +
> + vaddr = kmap(page);
> + ret = shmem_pwrite_slow(vaddr, shmem_page_offset, page_length,
> + user_data, page_do_bit17_swizzling,
> + partial_cacheline_write,
> + needs_clflush_after);
> + kunmap(page);
> +
> + mutex_lock(&dev->struct_mutex);
> + if (ret)
> + goto out_unpin;
> +
> +next_page:
> + remain -= page_length;
> + user_data += page_length;
> + offset += page_length;
> + }
> +out_unpin:
> + i915_gem_object_unpin_pages(obj);
> }
>
> out:
> - i915_gem_object_unpin_pages(obj);
> -
> if (hit_slowpath) {
> /* Fixup: Kill any reinstated backing storage pages */
> if (obj->madv == __I915_MADV_PURGED)
> --
> 1.7.10.4
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Mail: daniel at ffwll.ch
Mobile: +41 (0)79 365 57 48
More information about the Intel-gfx
mailing list