[Intel-gfx] [PATCH] drm/i915: Remove partial attempt to swizzle on pread/pwrite
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Oct 11 14:44:19 UTC 2018
On 11/10/2018 15:30, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-10-11 14:38:41)
>>
>> On 23/07/2018 10:02, Chris Wilson wrote:
>>> Our attempt to account for bit17 swizzling of pread/pwrite onto tiled
>>> objects was flawed due to the simple fact that we do not always know the
>>> swizzling for a particular page (due to the swizzling varying based on
>>> location in certain unbalanced configurations). Furthermore, the
>>> pread/pwrite paths are now unbalanced in that we are required to use the
>>> GTT as in some cases we do not have direct CPU access to the backing
>>> physical pages (thus some paths trying to account for the swizzle, but
>>> others neglecting, chaos ensues).
>>>
>>> There are no known users who do use pread/pwrite into a tiled object
>>> (you need to manually detile anyway, so why now just use mmap and avoid
>>> the copy?) and no user bug reports to indicate that it is being used in
>>> the wild. As no one is hitting the buggy path, we can just remove the
>>> buggy code.
>>>
>>> References: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex")
>>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>>> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
>>> ---
>>> drivers/gpu/drm/i915/i915_gem.c | 191 +++++---------------------------
>>> 1 file changed, 30 insertions(+), 161 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>>> index 8b52cb768a67..73a69352e0d4 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>> @@ -859,58 +859,6 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
>>> obj->write_domain = 0;
>>> }
>>>
>>> -static inline int
>>> -__copy_to_user_swizzled(char __user *cpu_vaddr,
>>> - const char *gpu_vaddr, int gpu_offset,
>>> - int length)
>>> -{
>>> - int ret, cpu_offset = 0;
>>> -
>>> - while (length > 0) {
>>> - int cacheline_end = ALIGN(gpu_offset + 1, 64);
>>> - int this_length = min(cacheline_end - gpu_offset, length);
>>> - int swizzled_gpu_offset = gpu_offset ^ 64;
>>> -
>>> - ret = __copy_to_user(cpu_vaddr + cpu_offset,
>>> - gpu_vaddr + swizzled_gpu_offset,
>>> - this_length);
>>> - if (ret)
>>> - return ret + length;
>>> -
>>> - cpu_offset += this_length;
>>> - gpu_offset += this_length;
>>> - length -= this_length;
>>> - }
>>> -
>>> - return 0;
>>> -}
>>> -
>>> -static inline int
>>> -__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
>>> - const char __user *cpu_vaddr,
>>> - int length)
>>> -{
>>> - int ret, cpu_offset = 0;
>>> -
>>> - while (length > 0) {
>>> - int cacheline_end = ALIGN(gpu_offset + 1, 64);
>>> - int this_length = min(cacheline_end - gpu_offset, length);
>>> - int swizzled_gpu_offset = gpu_offset ^ 64;
>>> -
>>> - ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
>>> - cpu_vaddr + cpu_offset,
>>> - this_length);
>>> - if (ret)
>>> - return ret + length;
>>> -
>>> - cpu_offset += this_length;
>>> - gpu_offset += this_length;
>>> - length -= this_length;
>>> - }
>>> -
>>> - return 0;
>>> -}
>>> -
>>> /*
>>> * Pins the specified object's pages and synchronizes the object with
>>> * GPU accesses. Sets needs_clflush to non-zero if the caller should
>>> @@ -1030,72 +978,29 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
>>> return ret;
>>> }
>>>
>>> -static void
>>> -shmem_clflush_swizzled_range(char *addr, unsigned long length,
>>> - bool swizzled)
>>> -{
>>> - if (unlikely(swizzled)) {
>>> - unsigned long start = (unsigned long) addr;
>>> - unsigned long end = (unsigned long) addr + length;
>>> -
>>> - /* For swizzling simply ensure that we always flush both
>>> - * channels. Lame, but simple and it works. Swizzled
>>> - * pwrite/pread is far from a hotpath - current userspace
>>> - * doesn't use it at all. */
>>> - start = round_down(start, 128);
>>> - end = round_up(end, 128);
>>> -
>>> - drm_clflush_virt_range((void *)start, end - start);
>>> - } else {
>>> - drm_clflush_virt_range(addr, length);
>>> - }
>>> -
>>> -}
>>> -
>>> -/* Only difference to the fast-path function is that this can handle bit17
>>> - * and uses non-atomic copy and kmap functions. */
>>> static int
>>> -shmem_pread_slow(struct page *page, int offset, int length,
>>> - char __user *user_data,
>>> - bool page_do_bit17_swizzling, bool needs_clflush)
>>> +shmem_pread(struct page *page, int offset, int len, char __user *user_data,
>>> + bool needs_clflush)
>>> {
>>> char *vaddr;
>>> int ret;
>>>
>>> - vaddr = kmap(page);
>>> - if (needs_clflush)
>>> - shmem_clflush_swizzled_range(vaddr + offset, length,
>>> - page_do_bit17_swizzling);
>>> -
>>> - if (page_do_bit17_swizzling)
>>> - ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
>>> - else
>>> - ret = __copy_to_user(user_data, vaddr + offset, length);
>>> - kunmap(page);
>>> + vaddr = kmap_atomic(page);
>>>
>>> - return ret ? - EFAULT : 0;
>>> -}
>>> + if (needs_clflush)
>>> + drm_clflush_virt_range(vaddr + offset, len);
>>>
>>> -static int
>>> -shmem_pread(struct page *page, int offset, int length, char __user *user_data,
>>> - bool page_do_bit17_swizzling, bool needs_clflush)
>>> -{
>>> - int ret;
>>> + ret = __copy_to_user_inatomic(user_data, vaddr + offset, len);
>>
>> Kerneldoc says userspace memory must be pinned so potential page faults
>> are avoided. I don't see that our code does that.
>
> kmap_atomic once upon a time gave assurances that _inatomic was safe.
> It's not important, kmap() is good enough (once upon a time there was a
> major difference for kmap_atomic as you had to use preallocated slots,
> the good old days ;)
kmap_atomic is also the other end of the copy. user_data is which also
mustn't fault, but never mind, kmap only path sounds indeed fine.
>
>>> - ret = -ENODEV;
>>> - if (!page_do_bit17_swizzling) {
>>> - char *vaddr = kmap_atomic(page);
>>> + kunmap_atomic(vaddr);
>>>
>>> - if (needs_clflush)
>>> - drm_clflush_virt_range(vaddr + offset, length);
>>> - ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
>>> - kunmap_atomic(vaddr);
>>> + if (ret) {
>>> + vaddr = kmap(page);
>>> + ret = __copy_to_user(user_data, vaddr + offset, len);
>>> + kunmap(page);
>>> }
>>> - if (ret == 0)
>>> - return 0;
>>>
>>> - return shmem_pread_slow(page, offset, length, user_data,
>>> - page_do_bit17_swizzling, needs_clflush);
>>> + return ret ? - EFAULT : 0;
>>
>> Why the space between - and EFAULT? Or why not just return ret?
>
> Fat fingers.
>
>>> }
>>>
>>> static int
>>> @@ -1104,15 +1009,10 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
>>> {
>>> char __user *user_data;
>>> u64 remain;
>>> - unsigned int obj_do_bit17_swizzling;
>>> unsigned int needs_clflush;
>>> unsigned int idx, offset;
>>> int ret;
>>>
>>> - obj_do_bit17_swizzling = 0;
>>> - if (i915_gem_object_needs_bit17_swizzle(obj))
>>> - obj_do_bit17_swizzling = BIT(17);
>>> -
>>> ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
>>> if (ret)
>>> return ret;
>>> @@ -1134,7 +1034,6 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
>>> length = PAGE_SIZE - offset;
>>
>> It's pre-existing, but there seems to be a potential overflow of int
>> length = u64 remain a few lines above this.
>
> Hmm. If so, that's one that escaped my perusal. Time for sanity checks
> probably.
Feel free to leave it for a separate patch/series, it was just an
observation. Or even see if you can delegate it. :)
Regards,
Tvrtko
More information about the Intel-gfx
mailing list