[Intel-gfx] [PATCH] drm/i915: Remove partial attempt to swizzle on pread/pwrite
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Oct 11 13:38:41 UTC 2018
On 23/07/2018 10:02, Chris Wilson wrote:
> Our attempt to account for bit17 swizzling of pread/pwrite onto tiled
> objects was flawed due to the simple fact that we do not always know the
> swizzling for a particular page (due to the swizzling varying based on
> location in certain unbalanced configurations). Furthermore, the
> pread/pwrite paths are now unbalanced in that we are required to use the
> GTT as in some cases we do not have direct CPU access to the backing
> physical pages (thus some paths trying to account for the swizzle, but
> others neglecting, chaos ensues).
>
> There are no known users who do use pread/pwrite into a tiled object
> (you need to manually detile anyway, so why now just use mmap and avoid
> the copy?) and no user bug reports to indicate that it is being used in
> the wild. As no one is hitting the buggy path, we can just remove the
> buggy code.
>
> References: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem.c | 191 +++++---------------------------
> 1 file changed, 30 insertions(+), 161 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 8b52cb768a67..73a69352e0d4 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -859,58 +859,6 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
> obj->write_domain = 0;
> }
>
> -static inline int
> -__copy_to_user_swizzled(char __user *cpu_vaddr,
> - const char *gpu_vaddr, int gpu_offset,
> - int length)
> -{
> - int ret, cpu_offset = 0;
> -
> - while (length > 0) {
> - int cacheline_end = ALIGN(gpu_offset + 1, 64);
> - int this_length = min(cacheline_end - gpu_offset, length);
> - int swizzled_gpu_offset = gpu_offset ^ 64;
> -
> - ret = __copy_to_user(cpu_vaddr + cpu_offset,
> - gpu_vaddr + swizzled_gpu_offset,
> - this_length);
> - if (ret)
> - return ret + length;
> -
> - cpu_offset += this_length;
> - gpu_offset += this_length;
> - length -= this_length;
> - }
> -
> - return 0;
> -}
> -
> -static inline int
> -__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
> - const char __user *cpu_vaddr,
> - int length)
> -{
> - int ret, cpu_offset = 0;
> -
> - while (length > 0) {
> - int cacheline_end = ALIGN(gpu_offset + 1, 64);
> - int this_length = min(cacheline_end - gpu_offset, length);
> - int swizzled_gpu_offset = gpu_offset ^ 64;
> -
> - ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
> - cpu_vaddr + cpu_offset,
> - this_length);
> - if (ret)
> - return ret + length;
> -
> - cpu_offset += this_length;
> - gpu_offset += this_length;
> - length -= this_length;
> - }
> -
> - return 0;
> -}
> -
> /*
> * Pins the specified object's pages and synchronizes the object with
> * GPU accesses. Sets needs_clflush to non-zero if the caller should
> @@ -1030,72 +978,29 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
> return ret;
> }
>
> -static void
> -shmem_clflush_swizzled_range(char *addr, unsigned long length,
> - bool swizzled)
> -{
> - if (unlikely(swizzled)) {
> - unsigned long start = (unsigned long) addr;
> - unsigned long end = (unsigned long) addr + length;
> -
> - /* For swizzling simply ensure that we always flush both
> - * channels. Lame, but simple and it works. Swizzled
> - * pwrite/pread is far from a hotpath - current userspace
> - * doesn't use it at all. */
> - start = round_down(start, 128);
> - end = round_up(end, 128);
> -
> - drm_clflush_virt_range((void *)start, end - start);
> - } else {
> - drm_clflush_virt_range(addr, length);
> - }
> -
> -}
> -
> -/* Only difference to the fast-path function is that this can handle bit17
> - * and uses non-atomic copy and kmap functions. */
> static int
> -shmem_pread_slow(struct page *page, int offset, int length,
> - char __user *user_data,
> - bool page_do_bit17_swizzling, bool needs_clflush)
> +shmem_pread(struct page *page, int offset, int len, char __user *user_data,
> + bool needs_clflush)
> {
> char *vaddr;
> int ret;
>
> - vaddr = kmap(page);
> - if (needs_clflush)
> - shmem_clflush_swizzled_range(vaddr + offset, length,
> - page_do_bit17_swizzling);
> -
> - if (page_do_bit17_swizzling)
> - ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
> - else
> - ret = __copy_to_user(user_data, vaddr + offset, length);
> - kunmap(page);
> + vaddr = kmap_atomic(page);
>
> - return ret ? - EFAULT : 0;
> -}
> + if (needs_clflush)
> + drm_clflush_virt_range(vaddr + offset, len);
>
> -static int
> -shmem_pread(struct page *page, int offset, int length, char __user *user_data,
> - bool page_do_bit17_swizzling, bool needs_clflush)
> -{
> - int ret;
> + ret = __copy_to_user_inatomic(user_data, vaddr + offset, len);
Kerneldoc says userspace memory must be pinned so potential page faults
are avoided. I don't see that our code does that.
>
> - ret = -ENODEV;
> - if (!page_do_bit17_swizzling) {
> - char *vaddr = kmap_atomic(page);
> + kunmap_atomic(vaddr);
>
> - if (needs_clflush)
> - drm_clflush_virt_range(vaddr + offset, length);
> - ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
> - kunmap_atomic(vaddr);
> + if (ret) {
> + vaddr = kmap(page);
> + ret = __copy_to_user(user_data, vaddr + offset, len);
> + kunmap(page);
> }
> - if (ret == 0)
> - return 0;
>
> - return shmem_pread_slow(page, offset, length, user_data,
> - page_do_bit17_swizzling, needs_clflush);
> + return ret ? - EFAULT : 0;
Why the space between - and EFAULT? Or why not just return ret?
> }
>
> static int
> @@ -1104,15 +1009,10 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
> {
> char __user *user_data;
> u64 remain;
> - unsigned int obj_do_bit17_swizzling;
> unsigned int needs_clflush;
> unsigned int idx, offset;
> int ret;
>
> - obj_do_bit17_swizzling = 0;
> - if (i915_gem_object_needs_bit17_swizzle(obj))
> - obj_do_bit17_swizzling = BIT(17);
> -
> ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
> if (ret)
> return ret;
> @@ -1134,7 +1034,6 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
> length = PAGE_SIZE - offset;
It's pre-existing, but there seems to be a potential overflow of int
length = u64 remain a few lines above this.
>
> ret = shmem_pread(page, offset, length, user_data,
> - page_to_phys(page) & obj_do_bit17_swizzling,
> needs_clflush);
> if (ret)
> break;
> @@ -1475,33 +1374,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
> return ret;
> }
>
> -static int
> -shmem_pwrite_slow(struct page *page, int offset, int length,
> - char __user *user_data,
> - bool page_do_bit17_swizzling,
> - bool needs_clflush_before,
> - bool needs_clflush_after)
> -{
> - char *vaddr;
> - int ret;
> -
> - vaddr = kmap(page);
> - if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
> - shmem_clflush_swizzled_range(vaddr + offset, length,
> - page_do_bit17_swizzling);
> - if (page_do_bit17_swizzling)
> - ret = __copy_from_user_swizzled(vaddr, offset, user_data,
> - length);
> - else
> - ret = __copy_from_user(vaddr + offset, user_data, length);
> - if (needs_clflush_after)
> - shmem_clflush_swizzled_range(vaddr + offset, length,
> - page_do_bit17_swizzling);
> - kunmap(page);
> -
> - return ret ? -EFAULT : 0;
> -}
> -
> /* Per-page copy function for the shmem pwrite fastpath.
> * Flushes invalid cachelines before writing to the target if
> * needs_clflush_before is set and flushes out any written cachelines after
> @@ -1509,31 +1381,34 @@ shmem_pwrite_slow(struct page *page, int offset, int length,
> */
> static int
> shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
> - bool page_do_bit17_swizzling,
> bool needs_clflush_before,
> bool needs_clflush_after)
> {
> + char *vaddr;
> int ret;
>
> - ret = -ENODEV;
> - if (!page_do_bit17_swizzling) {
> - char *vaddr = kmap_atomic(page);
> + vaddr = kmap_atomic(page);
>
> - if (needs_clflush_before)
> - drm_clflush_virt_range(vaddr + offset, len);
> - ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
> - if (needs_clflush_after)
> + if (needs_clflush_before)
> + drm_clflush_virt_range(vaddr + offset, len);
> +
> + ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
> + if (!ret && needs_clflush_after)
> + drm_clflush_virt_range(vaddr + offset, len);
> +
> + kunmap_atomic(vaddr);
> +
> + if (ret) {
> + vaddr = kmap(page);
> +
> + ret = __copy_from_user(vaddr + offset, user_data, len);
> + if (!ret && needs_clflush_after)
> drm_clflush_virt_range(vaddr + offset, len);
>
> - kunmap_atomic(vaddr);
> + kunmap(page);
> }
> - if (ret == 0)
> - return ret;
>
> - return shmem_pwrite_slow(page, offset, len, user_data,
> - page_do_bit17_swizzling,
> - needs_clflush_before,
> - needs_clflush_after);
> + return ret ? -EFAULT : 0;
> }
>
> static int
> @@ -1543,7 +1418,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
> struct drm_i915_private *i915 = to_i915(obj->base.dev);
> void __user *user_data;
> u64 remain;
> - unsigned int obj_do_bit17_swizzling;
> unsigned int partial_cacheline_write;
> unsigned int needs_clflush;
> unsigned int offset, idx;
> @@ -1558,10 +1432,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
> if (ret)
> return ret;
>
> - obj_do_bit17_swizzling = 0;
> - if (i915_gem_object_needs_bit17_swizzle(obj))
> - obj_do_bit17_swizzling = BIT(17);
> -
> /* If we don't overwrite a cacheline completely we need to be
> * careful to have up-to-date data by first clflushing. Don't
> * overcomplicate things and flush the entire patch.
> @@ -1582,7 +1452,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
> length = PAGE_SIZE - offset;
>
> ret = shmem_pwrite(page, offset, length, user_data,
> - page_to_phys(page) & obj_do_bit17_swizzling,
> (offset | length) & partial_cacheline_write,
> needs_clflush & CLFLUSH_AFTER);
> if (ret)
>
Same comments basically as with the pread path.
Regards,
Tvrtko
More information about the Intel-gfx
mailing list