[Intel-gfx] [PATCH] drm/i915: Remove partial attempt to swizzle on pread/pwrite
Chris Wilson
chris at chris-wilson.co.uk
Thu Oct 11 14:30:13 UTC 2018
Quoting Tvrtko Ursulin (2018-10-11 14:38:41)
>
> On 23/07/2018 10:02, Chris Wilson wrote:
> > Our attempt to account for bit17 swizzling of pread/pwrite onto tiled
> > objects was flawed due to the simple fact that we do not always know the
> > swizzling for a particular page (due to the swizzling varying based on
> > location in certain unbalanced configurations). Furthermore, the
> > pread/pwrite paths are now unbalanced in that we are required to use the
> > GTT as in some cases we do not have direct CPU access to the backing
> > physical pages (thus some paths trying to account for the swizzle, but
> > others neglecting, chaos ensues).
> >
> > There are no known users who do use pread/pwrite into a tiled object
> > (you need to manually detile anyway, so why now just use mmap and avoid
> > the copy?) and no user bug reports to indicate that it is being used in
> > the wild. As no one is hitting the buggy path, we can just remove the
> > buggy code.
> >
> > References: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex")
> > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> > ---
> > drivers/gpu/drm/i915/i915_gem.c | 191 +++++---------------------------
> > 1 file changed, 30 insertions(+), 161 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 8b52cb768a67..73a69352e0d4 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -859,58 +859,6 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
> > obj->write_domain = 0;
> > }
> >
> > -static inline int
> > -__copy_to_user_swizzled(char __user *cpu_vaddr,
> > - const char *gpu_vaddr, int gpu_offset,
> > - int length)
> > -{
> > - int ret, cpu_offset = 0;
> > -
> > - while (length > 0) {
> > - int cacheline_end = ALIGN(gpu_offset + 1, 64);
> > - int this_length = min(cacheline_end - gpu_offset, length);
> > - int swizzled_gpu_offset = gpu_offset ^ 64;
> > -
> > - ret = __copy_to_user(cpu_vaddr + cpu_offset,
> > - gpu_vaddr + swizzled_gpu_offset,
> > - this_length);
> > - if (ret)
> > - return ret + length;
> > -
> > - cpu_offset += this_length;
> > - gpu_offset += this_length;
> > - length -= this_length;
> > - }
> > -
> > - return 0;
> > -}
> > -
> > -static inline int
> > -__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
> > - const char __user *cpu_vaddr,
> > - int length)
> > -{
> > - int ret, cpu_offset = 0;
> > -
> > - while (length > 0) {
> > - int cacheline_end = ALIGN(gpu_offset + 1, 64);
> > - int this_length = min(cacheline_end - gpu_offset, length);
> > - int swizzled_gpu_offset = gpu_offset ^ 64;
> > -
> > - ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
> > - cpu_vaddr + cpu_offset,
> > - this_length);
> > - if (ret)
> > - return ret + length;
> > -
> > - cpu_offset += this_length;
> > - gpu_offset += this_length;
> > - length -= this_length;
> > - }
> > -
> > - return 0;
> > -}
> > -
> > /*
> > * Pins the specified object's pages and synchronizes the object with
> > * GPU accesses. Sets needs_clflush to non-zero if the caller should
> > @@ -1030,72 +978,29 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
> > return ret;
> > }
> >
> > -static void
> > -shmem_clflush_swizzled_range(char *addr, unsigned long length,
> > - bool swizzled)
> > -{
> > - if (unlikely(swizzled)) {
> > - unsigned long start = (unsigned long) addr;
> > - unsigned long end = (unsigned long) addr + length;
> > -
> > - /* For swizzling simply ensure that we always flush both
> > - * channels. Lame, but simple and it works. Swizzled
> > - * pwrite/pread is far from a hotpath - current userspace
> > - * doesn't use it at all. */
> > - start = round_down(start, 128);
> > - end = round_up(end, 128);
> > -
> > - drm_clflush_virt_range((void *)start, end - start);
> > - } else {
> > - drm_clflush_virt_range(addr, length);
> > - }
> > -
> > -}
> > -
> > -/* Only difference to the fast-path function is that this can handle bit17
> > - * and uses non-atomic copy and kmap functions. */
> > static int
> > -shmem_pread_slow(struct page *page, int offset, int length,
> > - char __user *user_data,
> > - bool page_do_bit17_swizzling, bool needs_clflush)
> > +shmem_pread(struct page *page, int offset, int len, char __user *user_data,
> > + bool needs_clflush)
> > {
> > char *vaddr;
> > int ret;
> >
> > - vaddr = kmap(page);
> > - if (needs_clflush)
> > - shmem_clflush_swizzled_range(vaddr + offset, length,
> > - page_do_bit17_swizzling);
> > -
> > - if (page_do_bit17_swizzling)
> > - ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
> > - else
> > - ret = __copy_to_user(user_data, vaddr + offset, length);
> > - kunmap(page);
> > + vaddr = kmap_atomic(page);
> >
> > - return ret ? - EFAULT : 0;
> > -}
> > + if (needs_clflush)
> > + drm_clflush_virt_range(vaddr + offset, len);
> >
> > -static int
> > -shmem_pread(struct page *page, int offset, int length, char __user *user_data,
> > - bool page_do_bit17_swizzling, bool needs_clflush)
> > -{
> > - int ret;
> > + ret = __copy_to_user_inatomic(user_data, vaddr + offset, len);
>
> Kerneldoc says userspace memory must be pinned so potential page faults
> are avoided. I don't see that our code does that.
kmap_atomic once upon a time gave assurances that _inatomic was safe.
It's not important, kmap() is good enough (once upon a time there was a
major difference for kmap_atomic as you had to use preallocated slots,
the good old days ;)
> > - ret = -ENODEV;
> > - if (!page_do_bit17_swizzling) {
> > - char *vaddr = kmap_atomic(page);
> > + kunmap_atomic(vaddr);
> >
> > - if (needs_clflush)
> > - drm_clflush_virt_range(vaddr + offset, length);
> > - ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
> > - kunmap_atomic(vaddr);
> > + if (ret) {
> > + vaddr = kmap(page);
> > + ret = __copy_to_user(user_data, vaddr + offset, len);
> > + kunmap(page);
> > }
> > - if (ret == 0)
> > - return 0;
> >
> > - return shmem_pread_slow(page, offset, length, user_data,
> > - page_do_bit17_swizzling, needs_clflush);
> > + return ret ? - EFAULT : 0;
>
> Why the space between - and EFAULT? Or why not just return ret?
Fat fingers.
> > }
> >
> > static int
> > @@ -1104,15 +1009,10 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
> > {
> > char __user *user_data;
> > u64 remain;
> > - unsigned int obj_do_bit17_swizzling;
> > unsigned int needs_clflush;
> > unsigned int idx, offset;
> > int ret;
> >
> > - obj_do_bit17_swizzling = 0;
> > - if (i915_gem_object_needs_bit17_swizzle(obj))
> > - obj_do_bit17_swizzling = BIT(17);
> > -
> > ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
> > if (ret)
> > return ret;
> > @@ -1134,7 +1034,6 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
> > length = PAGE_SIZE - offset;
>
> It's pre-existing, but there seems to be a potential overflow of int
> length = u64 remain a few lines above this.
Hmm. If so, that's one that escaped my perusal. Time for sanity checks
probably.
-Chris
More information about the Intel-gfx
mailing list