[igt-dev] [PATCH i-g-t v23 37/37] tests/gem_linear_blits: Use intel allocator
Zbigniew Kempczyński
zbigniew.kempczynski at intel.com
Tue Mar 16 13:08:54 UTC 2021
On Tue, Mar 16, 2021 at 11:23:25AM +0000, Chris Wilson wrote:
> Quoting Zbigniew Kempczyński (2021-03-15 16:58:42)
> > From: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
> >
> > Use intel allocator directly, without intel-bb infrastructure.
> >
> > Signed-off-by: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
> > Cc: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> > Cc: Chris Wilson <chris at chris-wilson.co.uk>
> > ---
> > tests/i915/gem_linear_blits.c | 120 ++++++++++++++++++++++++----------
> > 1 file changed, 84 insertions(+), 36 deletions(-)
> >
> > diff --git a/tests/i915/gem_linear_blits.c b/tests/i915/gem_linear_blits.c
> > index cae42d52a..b85fd9fef 100644
> > --- a/tests/i915/gem_linear_blits.c
> > +++ b/tests/i915/gem_linear_blits.c
> > @@ -53,10 +53,13 @@ IGT_TEST_DESCRIPTION("Test doing many blits with a working set larger than the"
> > #define WIDTH 512
> > #define HEIGHT 512
> >
> > +/* We don't have alignment detection yet, so assume worst case scenario */
> > +#define ALIGNMENT (2048*1024)
> > +
> > static uint32_t linear[WIDTH*HEIGHT];
> >
> > -static void
> > -copy(int fd, uint32_t dst, uint32_t src)
> > +static void copy(int fd, uint64_t ahnd, uint32_t dst, uint32_t src,
> > + uint64_t dst_offset, uint64_t src_offset, bool do_relocs)
> > {
> > uint32_t batch[12];
> > struct drm_i915_gem_relocation_entry reloc[2];
> > @@ -64,6 +67,23 @@ copy(int fd, uint32_t dst, uint32_t src)
> > struct drm_i915_gem_execbuffer2 exec;
> > int i = 0;
> >
> > + memset(obj, 0, sizeof(obj));
> > + obj[0].handle = dst;
> > + obj[0].offset = CANONICAL(dst_offset);
> > + obj[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE |
> > + EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> > + obj[1].handle = src;
> > + obj[1].offset = CANONICAL(src_offset);
> > + obj[1].flags = EXEC_OBJECT_PINNED |
> > + EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> > +
> > + obj[2].handle = gem_create(fd, 4096);
> > + obj[2].offset = intel_allocator_alloc(ahnd, obj[2].handle,
> > + 4096, ALIGNMENT);
> > + obj[2].offset = CANONICAL(obj[2].offset);
> > + obj[2].flags = EXEC_OBJECT_PINNED |
> > + EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> > +
> > batch[i++] = XY_SRC_COPY_BLT_CMD |
> > XY_SRC_COPY_BLT_WRITE_ALPHA |
> > XY_SRC_COPY_BLT_WRITE_RGB;
> > @@ -77,49 +97,52 @@ copy(int fd, uint32_t dst, uint32_t src)
> > WIDTH*4;
> > batch[i++] = 0; /* dst x1,y1 */
> > batch[i++] = (HEIGHT << 16) | WIDTH; /* dst x2,y2 */
> > - batch[i++] = 0; /* dst reloc */
> > + batch[i++] = obj[0].offset;
> > if (intel_gen(intel_get_drm_devid(fd)) >= 8)
> > - batch[i++] = 0;
> > + batch[i++] = obj[0].offset >> 32;
> > batch[i++] = 0; /* src x1,y1 */
> > batch[i++] = WIDTH*4;
> > - batch[i++] = 0; /* src reloc */
> > + batch[i++] = obj[1].offset;
> > if (intel_gen(intel_get_drm_devid(fd)) >= 8)
> > - batch[i++] = 0;
> > + batch[i++] = obj[1].offset >> 32;
> > batch[i++] = MI_BATCH_BUFFER_END;
> > batch[i++] = MI_NOOP;
> >
> > - memset(reloc, 0, sizeof(reloc));
> > - reloc[0].target_handle = dst;
> > - reloc[0].delta = 0;
> > - reloc[0].offset = 4 * sizeof(batch[0]);
> > - reloc[0].presumed_offset = 0;
> > - reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
> > - reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
> > -
> > - reloc[1].target_handle = src;
> > - reloc[1].delta = 0;
> > - reloc[1].offset = 7 * sizeof(batch[0]);
> > - if (intel_gen(intel_get_drm_devid(fd)) >= 8)
> > - reloc[1].offset += sizeof(batch[0]);
> > - reloc[1].presumed_offset = 0;
> > - reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
> > - reloc[1].write_domain = 0;
> > -
> > - memset(obj, 0, sizeof(obj));
> > - obj[0].handle = dst;
> > - obj[1].handle = src;
> > - obj[2].handle = gem_create(fd, 4096);
> > gem_write(fd, obj[2].handle, 0, batch, i * sizeof(batch[0]));
> > - obj[2].relocation_count = 2;
> > - obj[2].relocs_ptr = to_user_pointer(reloc);
> > +
> > + if (do_relocs) {
> > + memset(reloc, 0, sizeof(reloc));
> > + reloc[0].target_handle = dst;
> > + reloc[0].delta = 0;
> > + reloc[0].offset = 4 * sizeof(batch[0]);
> > + reloc[0].presumed_offset = obj[0].offset;
> > + reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
> > + reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
> > +
> > + reloc[1].target_handle = src;
> > + reloc[1].delta = 0;
> > + reloc[1].offset = 7 * sizeof(batch[0]);
> > + if (intel_gen(intel_get_drm_devid(fd)) >= 8)
> > + reloc[1].offset += sizeof(batch[0]);
> > + reloc[1].presumed_offset = obj[1].offset;
> > + reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
> > + reloc[1].write_domain = 0;
> > +
> > + obj[0].flags &= ~EXEC_OBJECT_PINNED;
> > + obj[1].flags &= ~EXEC_OBJECT_PINNED;
> > + obj[2].flags &= ~EXEC_OBJECT_PINNED;
> > + obj[2].relocation_count = ARRAY_SIZE(reloc);
> > + obj[2].relocs_ptr = to_user_pointer(reloc);
> > + }
> >
> > memset(&exec, 0, sizeof(exec));
> > exec.buffers_ptr = to_user_pointer(obj);
> > - exec.buffer_count = 3;
> > + exec.buffer_count = ARRAY_SIZE(obj);
> > exec.batch_len = i * sizeof(batch[0]);
> > exec.flags = gem_has_blt(fd) ? I915_EXEC_BLT : 0;
> > -
> > gem_execbuf(fd, &exec);
> > +
> > + intel_allocator_free(ahnd, obj[2].handle);
> > gem_close(fd, obj[2].handle);
> > }
>
>
> @@ -64,6 +67,20 @@ copy(int fd, uint32_t dst, uint32_t src)
> struct drm_i915_gem_execbuffer2 exec;
> int i = 0;
>
> + memset(obj, 0, sizeof(obj));
> + obj[0].handle = dst;
> + obj[0].offset = CANONICAL(dst_offset);
> + obj[0].flags = EXEC_OBJECT_WRITE | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> + obj[1].handle = src;
> + obj[1].offset = CANONICAL(src_offset);
> + obj[1].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> +
> + obj[2].handle = gem_create(fd, 4096);
> + obj[2].offset = intel_allocator_alloc(ahnd, obj[2].handle,
> + 4096, ALIGNMENT);
> + obj[2].offset = CANONICAL(obj[2].offset);
> + obj[2].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> +
> batch[i++] = XY_SRC_COPY_BLT_CMD |
> XY_SRC_COPY_BLT_WRITE_ALPHA |
> XY_SRC_COPY_BLT_WRITE_RGB;
> @@ -77,14 +94,14 @@ copy(int fd, uint32_t dst, uint32_t src)
> WIDTH*4;
> batch[i++] = 0; /* dst x1,y1 */
> batch[i++] = (HEIGHT << 16) | WIDTH; /* dst x2,y2 */
> - batch[i++] = 0; /* dst reloc */
> + batch[i++] = obj[0].offset;
> if (intel_gen(intel_get_drm_devid(fd)) >= 8)
> - batch[i++] = 0;
> + batch[i++] = obj[0].offset >> 32;
> batch[i++] = 0; /* src x1,y1 */
> batch[i++] = WIDTH*4;
> - batch[i++] = 0; /* src reloc */
> + batch[i++] = obj[1].offset;
> if (intel_gen(intel_get_drm_devid(fd)) >= 8)
> - batch[i++] = 0;
> + batch[i++] = obj[1].offset >> 32;
> batch[i++] = MI_BATCH_BUFFER_END;
> batch[i++] = MI_NOOP;
>
> @@ -92,7 +109,7 @@ copy(int fd, uint32_t dst, uint32_t src)
> reloc[0].target_handle = dst;
> reloc[0].delta = 0;
> reloc[0].offset = 4 * sizeof(batch[0]);
> - reloc[0].presumed_offset = 0;
> + reloc[0].presumed_offset = obj[0].offset;
> reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
> reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
>
> @@ -101,25 +118,29 @@ copy(int fd, uint32_t dst, uint32_t src)
> reloc[1].offset = 7 * sizeof(batch[0]);
> if (intel_gen(intel_get_drm_devid(fd)) >= 8)
> reloc[1].offset += sizeof(batch[0]);
> - reloc[1].presumed_offset = 0;
> + reloc[1].presumed_offset = obj[1].offset;
> reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
> reloc[1].write_domain = 0;
>
> - memset(obj, 0, sizeof(obj));
> - obj[0].handle = dst;
> - obj[1].handle = src;
> - obj[2].handle = gem_create(fd, 4096);
> gem_write(fd, obj[2].handle, 0, batch, i * sizeof(batch[0]));
> - obj[2].relocation_count = 2;
> - obj[2].relocs_ptr = to_user_pointer(reloc);
> +
> + if (do_relocs) {
> + obj[2].relocation_count = ARRAY_SIZE(reloc);
> + obj[2].relocs_ptr = to_user_pointer(reloc);
> + } else {
> + obj[0].flags |= EXEC_OBJECT_PINNED;
> + obj[1].flags |= EXEC_OBJECT_PINNED;
> + obj[2].flags |= EXEC_OBJECT_PINNED;
> + }
>
> memset(&exec, 0, sizeof(exec));
> exec.buffers_ptr = to_user_pointer(obj);
> - exec.buffer_count = 3;
> + exec.buffer_count = ARRAY_SIZE(obj);
> exec.batch_len = i * sizeof(batch[0]);
> exec.flags = gem_has_blt(fd) ? I915_EXEC_BLT : 0;
> -
> gem_execbuf(fd, &exec);
> +
> + intel_allocator_free(ahnd, obj[2].handle);
> gem_close(fd, obj[2].handle);
> }
>
>
> Is more concise, both in delta and test flow.
Above makes sense, looks better. I'll sent this in an hour.
--
Zbigniew
>
> It also suggests something like
>
> intel_allocator_apply_relocations(ahnd, &exec):
> foreach(exec.obj) {
> foreach(execobj.reloc) {
> if (reloc.write_domain)
> execobj.flags |= EXEC_OBJECT_WRITE;
>
> if (reloc.presumed_offset == reloc.target->offset)
> continue;
>
> reloc.presumed_offset = lookup_exec_offset(exec, reloc.target);
> u64 address = reloc.presumed_offset + (int)reloc.delta;
> memcpy(mmap(execobj) + reloc.offset, &address, ahnd->address_size);
> }
> if (!ahnd->use_relocs) {
> execobj.reloc = {};
> execobj.flags |= EXEC_OBJECT_PINNED;
> }
> }
> exec.flags |= I915_EXEC_NORELOC;
>
>
> would be really useful for converting tests.
I'm going to do this in next series when I start rewriting tests.
Current series has to much iterations and I would like to add
all convinience code later when this series will be merged.
But yes, you're right and this could save a lot of time during
rewriting.
--
Zbigniew
>
>
> > @@ -157,17 +180,28 @@ check_bo(int fd, uint32_t handle, uint32_t val)
> > igt_assert_eq(num_errors, 0);
> > }
> >
> > -static void run_test(int fd, int count)
> > +static void run_test(int fd, int count, bool do_relocs)
> > {
> > uint32_t *handle, *start_val;
> > + uint64_t *offset, ahnd;
> > uint32_t start = 0;
> > int i;
> >
> > + ahnd = intel_allocator_open(fd, 0, do_relocs ?
> > + INTEL_ALLOCATOR_RELOC :
> > + INTEL_ALLOCATOR_SIMPLE);
> > +
> > handle = malloc(sizeof(uint32_t) * count * 2);
> > + offset = calloc(1, sizeof(uint64_t) * count);
>
> calloc(count, sizeof(uint64_t)
>
> > + igt_assert_f(handle && offset, "Allocation failed\n");
> > start_val = handle + count;
> >
> > for (i = 0; i < count; i++) {
> > handle[i] = create_bo(fd, start);
> > +
> > + offset[i] = intel_allocator_alloc(ahnd, handle[i],
> > + sizeof(linear), ALIGNMENT);
> > +
> > start_val[i] = start;
> > start += 1024 * 1024 / 4;
> > }
> > @@ -178,17 +212,22 @@ static void run_test(int fd, int count)
> >
> > if (src == dst)
> > continue;
> > + copy(fd, ahnd, handle[dst], handle[src],
> > + offset[dst], offset[src], do_relocs);
> >
> > - copy(fd, handle[dst], handle[src]);
> > start_val[dst] = start_val[src];
> > }
> >
> > for (i = 0; i < count; i++) {
> > check_bo(fd, handle[i], start_val[i]);
> > + intel_allocator_free(ahnd, handle[i]);
> > gem_close(fd, handle[i]);
> > }
> >
> > free(handle);
> > + free(offset);
> > +
> > + intel_allocator_close(ahnd);
> > }
> >
> > #define MAX_32b ((1ull << 32) - 4096)
> > @@ -197,16 +236,21 @@ igt_main
> > {
> > const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> > uint64_t count = 0;
> > + bool do_relocs;
> > int fd = -1;
> >
> > igt_fixture {
> > fd = drm_open_driver(DRIVER_INTEL);
> > igt_require_gem(fd);
> > gem_require_blitter(fd);
> > + do_relocs = !gem_uses_ppgtt(fd);
> >
> > count = gem_aperture_size(fd);
> > if (count >> 32)
> > count = MAX_32b;
> > + else
> > + do_relocs = true;
>
> So this catches Braswell (has a 32b full-ppgtt) and only Braswell.
> Any reason why the simple allocator cannot handle less than 32b of
> address space?
>
> > +
> > count = 3 + count / (1024*1024);
> > igt_require(count > 1);
> > intel_require_memory(count, sizeof(linear), CHECK_RAM);
> > @@ -216,19 +260,23 @@ igt_main
> > }
> >
> > igt_subtest("basic")
> > - run_test(fd, 2);
> > + run_test(fd, 2, do_relocs);
> >
> > igt_subtest("normal") {
> > + intel_allocator_multiprocess_start();
> > igt_fork(child, ncpus)
> > - run_test(fd, count);
> > + run_test(fd, count, do_relocs);
> > igt_waitchildren();
> > + intel_allocator_multiprocess_stop();
> > }
> >
> > igt_subtest("interruptible") {
> > + intel_allocator_multiprocess_start();
> > igt_fork_signal_helper();
> > igt_fork(child, ncpus)
> > - run_test(fd, count);
> > + run_test(fd, count, do_relocs);
> > igt_waitchildren();
> > igt_stop_signal_helper();
> > + intel_allocator_multiprocess_stop();
> > }
> > }
> > --
> > 2.26.0
> >
> > _______________________________________________
> > igt-dev mailing list
> > igt-dev at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/igt-dev
More information about the igt-dev
mailing list