[Intel-gfx] [PATCH] drm/i915: Remove partial attempt to swizzle on pread/pwrite

Thu Oct 11 14:30:13 UTC 2018

Quoting Tvrtko Ursulin (2018-10-11 14:38:41)
> 
> On 23/07/2018 10:02, Chris Wilson wrote:
> > Our attempt to account for bit17 swizzling of pread/pwrite onto tiled
> > objects was flawed due to the simple fact that we do not always know the
> > swizzling for a particular page (due to the swizzling varying based on
> > location in certain unbalanced configurations). Furthermore, the
> > pread/pwrite paths are now unbalanced in that we are required to use the
> > GTT as in some cases we do not have direct CPU access to the backing
> > physical pages (thus some paths trying to account for the swizzle, but
> > others neglecting, chaos ensues).
> > 
> > There are no known users who do use pread/pwrite into a tiled object
> > (you need to manually detile anyway, so why now just use mmap and avoid
> > the copy?) and no user bug reports to indicate that it is being used in
> > the wild. As no one is hitting the buggy path, we can just remove the
> > buggy code.
> > 
> > References: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex")
> > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_gem.c | 191 +++++---------------------------
> >   1 file changed, 30 insertions(+), 161 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 8b52cb768a67..73a69352e0d4 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -859,58 +859,6 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
> >       obj->write_domain = 0;
> >   }
> >   
> > -static inline int
> > -__copy_to_user_swizzled(char __user *cpu_vaddr,
> > -                     const char *gpu_vaddr, int gpu_offset,
> > -                     int length)
> > -{
> > -     int ret, cpu_offset = 0;
> > -
> > -     while (length > 0) {
> > -             int cacheline_end = ALIGN(gpu_offset + 1, 64);
> > -             int this_length = min(cacheline_end - gpu_offset, length);
> > -             int swizzled_gpu_offset = gpu_offset ^ 64;
> > -
> > -             ret = __copy_to_user(cpu_vaddr + cpu_offset,
> > -                                  gpu_vaddr + swizzled_gpu_offset,
> > -                                  this_length);
> > -             if (ret)
> > -                     return ret + length;
> > -
> > -             cpu_offset += this_length;
> > -             gpu_offset += this_length;
> > -             length -= this_length;
> > -     }
> > -
> > -     return 0;
> > -}
> > -
> > -static inline int
> > -__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
> > -                       const char __user *cpu_vaddr,
> > -                       int length)
> > -{
> > -     int ret, cpu_offset = 0;
> > -
> > -     while (length > 0) {
> > -             int cacheline_end = ALIGN(gpu_offset + 1, 64);
> > -             int this_length = min(cacheline_end - gpu_offset, length);
> > -             int swizzled_gpu_offset = gpu_offset ^ 64;
> > -
> > -             ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
> > -                                    cpu_vaddr + cpu_offset,
> > -                                    this_length);
> > -             if (ret)
> > -                     return ret + length;
> > -
> > -             cpu_offset += this_length;
> > -             gpu_offset += this_length;
> > -             length -= this_length;
> > -     }
> > -
> > -     return 0;
> > -}
> > -
> >   /*
> >    * Pins the specified object's pages and synchronizes the object with
> >    * GPU accesses. Sets needs_clflush to non-zero if the caller should
> > @@ -1030,72 +978,29 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
> >       return ret;
> >   }
> >   
> > -static void
> > -shmem_clflush_swizzled_range(char *addr, unsigned long length,
> > -                          bool swizzled)
> > -{
> > -     if (unlikely(swizzled)) {
> > -             unsigned long start = (unsigned long) addr;
> > -             unsigned long end = (unsigned long) addr + length;
> > -
> > -             /* For swizzling simply ensure that we always flush both
> > -              * channels. Lame, but simple and it works. Swizzled
> > -              * pwrite/pread is far from a hotpath - current userspace
> > -              * doesn't use it at all. */
> > -             start = round_down(start, 128);
> > -             end = round_up(end, 128);
> > -
> > -             drm_clflush_virt_range((void *)start, end - start);
> > -     } else {
> > -             drm_clflush_virt_range(addr, length);
> > -     }
> > -
> > -}
> > -
> > -/* Only difference to the fast-path function is that this can handle bit17
> > - * and uses non-atomic copy and kmap functions. */
> >   static int
> > -shmem_pread_slow(struct page *page, int offset, int length,
> > -              char __user *user_data,
> > -              bool page_do_bit17_swizzling, bool needs_clflush)
> > +shmem_pread(struct page *page, int offset, int len, char __user *user_data,
> > +         bool needs_clflush)
> >   {
> >       char *vaddr;
> >       int ret;
> >   
> > -     vaddr = kmap(page);
> > -     if (needs_clflush)
> > -             shmem_clflush_swizzled_range(vaddr + offset, length,
> > -                                          page_do_bit17_swizzling);
> > -
> > -     if (page_do_bit17_swizzling)
> > -             ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
> > -     else
> > -             ret = __copy_to_user(user_data, vaddr + offset, length);
> > -     kunmap(page);
> > +     vaddr = kmap_atomic(page);
> >   
> > -     return ret ? - EFAULT : 0;
> > -}
> > +     if (needs_clflush)
> > +             drm_clflush_virt_range(vaddr + offset, len);
> >   
> > -static int
> > -shmem_pread(struct page *page, int offset, int length, char __user *user_data,
> > -         bool page_do_bit17_swizzling, bool needs_clflush)
> > -{
> > -     int ret;
> > +     ret = __copy_to_user_inatomic(user_data, vaddr + offset, len);
> 
> Kerneldoc says userspace memory must be pinned so potential page faults 
> are avoided. I don't see that our code does that.

kmap_atomic once upon a time gave assurances that _inatomic was safe.
It's not important, kmap() is good enough (once upon a time there was a
major difference for kmap_atomic as you had to use preallocated slots,
the good old days ;)

> > -     ret = -ENODEV;
> > -     if (!page_do_bit17_swizzling) {
> > -             char *vaddr = kmap_atomic(page);
> > +     kunmap_atomic(vaddr);
> >   
> > -             if (needs_clflush)
> > -                     drm_clflush_virt_range(vaddr + offset, length);
> > -             ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
> > -             kunmap_atomic(vaddr);
> > +     if (ret) {
> > +             vaddr = kmap(page);
> > +             ret = __copy_to_user(user_data, vaddr + offset, len);
> > +             kunmap(page);
> >       }
> > -     if (ret == 0)
> > -             return 0;
> >   
> > -     return shmem_pread_slow(page, offset, length, user_data,
> > -                             page_do_bit17_swizzling, needs_clflush);
> > +     return ret ? - EFAULT : 0;
> 
> Why the space between - and EFAULT? Or why not just return ret?

Fat fingers.

> >   }
> >   
> >   static int
> > @@ -1104,15 +1009,10 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
> >   {
> >       char __user *user_data;
> >       u64 remain;
> > -     unsigned int obj_do_bit17_swizzling;
> >       unsigned int needs_clflush;
> >       unsigned int idx, offset;
> >       int ret;
> >   
> > -     obj_do_bit17_swizzling = 0;
> > -     if (i915_gem_object_needs_bit17_swizzle(obj))
> > -             obj_do_bit17_swizzling = BIT(17);
> > -
> >       ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
> >       if (ret)
> >               return ret;
> > @@ -1134,7 +1034,6 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
> >                       length = PAGE_SIZE - offset;
> 
> It's pre-existing, but there seems to be a potential overflow of int 
> length = u64 remain a few lines above this.

Hmm. If so, that's one that escaped my perusal. Time for sanity checks
probably.
-Chris