[Intel-gfx] [PATCH] drm/i915: Batch copy_from_user for relocation processing
Daniel Vetter
daniel at ffwll.ch
Sat Mar 24 13:33:02 CET 2012
On Sat, Mar 24, 2012 at 12:20:24AM +0000, Chris Wilson wrote:
> Originally the code tried to allocate a large enough array to perform
> the copy using vmalloc, performance wasn't great and throughput was
> improved by processing each individual relocation entry separately.
> This too is not as efficient as one would desire. A compromise is then
> to allocate a single page (since that is relatively cheap) and process
> the relocations in page size batches.
>
> x11perf -copywinwin10: n450/pnv i3-330m i5-2520m (cpu)
> Before: 249000 785000 1280000 (80%)
> After: 264000 896000 1280000 (65%)
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 92 +++++++++++++++++++++++-----
> 1 files changed, 77 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 2d5d41b..1da04db 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -407,28 +407,90 @@ i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
> {
> struct drm_i915_gem_relocation_entry __user *user_relocs;
> struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
> - int i, ret;
> + int i, ret = 0;
> +
> + if (entry->relocation_count == 0)
> + return 0;
> +
> +#define N_RELOC_PER_PAGE \
> + (PAGE_SIZE / sizeof(struct drm_i915_gem_relocation_entry))
>
> user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
> - for (i = 0; i < entry->relocation_count; i++) {
> - struct drm_i915_gem_relocation_entry reloc;
>
> - if (__copy_from_user_inatomic(&reloc,
> - user_relocs+i,
> - sizeof(reloc)))
> - return -EFAULT;
> + if (entry->relocation_count < N_RELOC_PER_PAGE / 2) {
> +fallback:
We don't we go slightly more over board with optimiziting this?
drm_i915_gem_relocation_entry is on 32 bytes, so we can fit a few onto the
stack. Hence what about this before the loop?
drm_i915_gem_relocation_entry stack_arr[512/sizeof(reloc_entry)];
if (relocation_count > N_RELOC_PER_PAGE) {
tmp_stor = __get_free_page;
if (!tmp_stor)
goto fallback;
tmp_stor_size = N_RELOC_PER_PAGE;
} else {
fallback:
tmp_stor = stackk_arr;
tmp_stor_size = 512/sizeof(reloc_entry);
}
Then we also wouldn't have the not-so-beautiful duplication in code. Maybe
the added complexity of the page_alloc path isn't even worth it anymore.
Can I bother you to play around with this?
Cheers, Daniel
> + for (i = 0; i < entry->relocation_count; i++) {
> + struct drm_i915_gem_relocation_entry reloc;
> + u64 offset;
>
> - ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc);
> - if (ret)
> - return ret;
> + if (__copy_from_user_inatomic(&reloc,
> + user_relocs+i,
> + sizeof(reloc)))
> + return -EFAULT;
>
> - if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset,
> - &reloc.presumed_offset,
> - sizeof(reloc.presumed_offset)))
> - return -EFAULT;
> + offset = reloc.presumed_offset;
> +
> + ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc);
> + if (ret)
> + return ret;
> +
> + if (reloc.presumed_offset != offset &&
> + __copy_to_user_inatomic(&user_relocs[i].presumed_offset,
> + &reloc.presumed_offset,
> + sizeof(reloc.presumed_offset)))
> + return -EFAULT;
> + }
> + } else {
> + unsigned long page;
> + int remain;
> +
> + page = __get_free_page(GFP_TEMPORARY);
> + if (unlikely(page == 0))
> + goto fallback;
> +
> + i = 0;
> + remain = entry->relocation_count;
> + do {
> + struct drm_i915_gem_relocation_entry *r =
> + (struct drm_i915_gem_relocation_entry *)page;
> + int count = remain;
> + if (count > N_RELOC_PER_PAGE)
> + count = N_RELOC_PER_PAGE;
> + remain -= count;
> +
> + if (__copy_from_user_inatomic(r, user_relocs+i,
> + count*sizeof(r[0]))) {
> + ret = -EFAULT;
> + goto err;
> + }
> +
> + do {
> + u64 offset = r->presumed_offset;
> +
> + ret = i915_gem_execbuffer_relocate_entry(obj, eb, r);
> + if (ret)
> + goto err;
> +
> + if (r->presumed_offset != offset &&
> + __copy_to_user_inatomic(&user_relocs[i].presumed_offset,
> + &r->presumed_offset,
> + sizeof(r->presumed_offset))) {
> + ret = -EFAULT;
> + goto err;
> + }
> +
> + i++;
> + r++;
> + } while (--count);
> + } while (remain);
> +
> + ret = 0;
> +err:
> + free_page(page);
> }
>
> - return 0;
> + return ret;
> +#undef N_RELOC_PER_PAGE
> }
>
> static int
> --
> 1.7.9.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Mail: daniel at ffwll.ch
Mobile: +41 (0)79 365 57 48
More information about the Intel-gfx
mailing list