[igt-dev] [PATCH i-g-t 1/2] tests/i915/gem_exec_gttfill: Support gens without relocations
Zbigniew Kempczyński
zbigniew.kempczynski at intel.com
Wed May 12 08:14:18 UTC 2021
On Wed, May 12, 2021 at 07:40:09AM +0200, Andrzej Turko wrote:
> With relocations disabled for newer generations
> addresses of objects need to be assigned by the test.
> As all the objects won't fit in the gtt, using the allocator
> does not guarantee that submitted batches won't overlap.
> It only reduces the number of overlapping objects while ensuring
> that evictions happen at different offsets.
>
> Signed-off-by: Andrzej Turko <andrzej.turko at linux.intel.com>
> Cc: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> ---
> tests/i915/gem_exec_gttfill.c | 75 ++++++++++++++++++++++++++++++-----
> 1 file changed, 64 insertions(+), 11 deletions(-)
>
> diff --git a/tests/i915/gem_exec_gttfill.c b/tests/i915/gem_exec_gttfill.c
> index c0e27c9bb..091c74ebb 100644
> --- a/tests/i915/gem_exec_gttfill.c
> +++ b/tests/i915/gem_exec_gttfill.c
> @@ -28,6 +28,8 @@
> IGT_TEST_DESCRIPTION("Fill the GTT with batches.");
>
> #define BATCH_SIZE (4096<<10)
> +/* We don't have alignment detection yet, so assume the worst-case scenario. */
> +#define BATCH_ALIGNMENT (1 << 21)
>
> struct batch {
> uint32_t handle;
> @@ -47,15 +49,21 @@ static void xchg_batch(void *array, unsigned int i, unsigned int j)
> static void submit(int fd, int gen,
> struct drm_i915_gem_execbuffer2 *eb,
> struct drm_i915_gem_relocation_entry *reloc,
> - struct batch *batches, unsigned int count)
> + struct batch *batches, unsigned int count,
> + uint64_t ahnd, bool do_relocs)
ahnd == 0 is invalid, you can use it instead of additional
do_relocs variable.
> {
> struct drm_i915_gem_exec_object2 obj;
> uint32_t batch[16];
> - unsigned n;
> + uint64_t address, value;
> + unsigned n, j;
>
> memset(&obj, 0, sizeof(obj));
> - obj.relocs_ptr = to_user_pointer(reloc);
> - obj.relocation_count = 2;
> + if (do_relocs) {
> + obj.relocs_ptr = to_user_pointer(reloc);
> + obj.relocation_count = 2;
> + } else {
> + obj.flags |= EXEC_OBJECT_PINNED;
> + }
>
> memset(reloc, 0, 2*sizeof(*reloc));
> reloc[0].offset = eb->batch_start_offset;
> @@ -85,16 +93,40 @@ static void submit(int fd, int gen,
> batch[++n] = 0; /* lower_32_bits(value) */
> batch[++n] = 0; /* upper_32_bits(value) / nop */
> batch[++n] = MI_BATCH_BUFFER_END;
> -
> eb->buffers_ptr = to_user_pointer(&obj);
> + j = 0;
> for (unsigned i = 0; i < count; i++) {
> obj.handle = batches[i].handle;
> reloc[0].target_handle = obj.handle;
> reloc[1].target_handle = obj.handle;
>
> - obj.offset = 0;
> - reloc[0].presumed_offset = obj.offset;
> - reloc[1].presumed_offset = obj.offset;
> + if (do_relocs) {
> + obj.offset = 0;
> + } else {
> + obj.offset = __intel_allocator_alloc(ahnd, obj.handle,
> + BATCH_SIZE,
> + BATCH_ALIGNMENT,
> + ALLOC_STRATEGY_HIGH_TO_LOW);
> + for (; obj.offset == -1; j = ((++j) == count ? 0 : j)) {
> + if (i != j)
> + intel_allocator_free(ahnd, batches[j].handle);
> + obj.offset = __intel_allocator_alloc(ahnd, obj.handle,
> + BATCH_SIZE,
> + BATCH_ALIGNMENT,
> + ALLOC_STRATEGY_HIGH_TO_LOW);
> + }
Ha, we're in userspace competing over single offsets set.
Why you just don't use:
j = (j + 1) % count;
It is more readable and no sequencing risk can occur (it would
be likely catched by compiler).
> +
> + /* If there is no relocation support, we assume gen >= 8. */
> + reloc[0].presumed_offset = obj.offset;
> + address = obj.offset + reloc[0].delta;
> + batch[1] = address;
> + batch[2] = address >> 32;
> +
> + reloc[1].presumed_offset = obj.offset;
> + value = obj.offset + reloc[1].delta;
> + batch[3] = value;
> + batch[4] = value >> 32;
> + }
>
> memcpy(batches[i].ptr + eb->batch_start_offset,
> batch, sizeof(batch));
> @@ -116,7 +148,8 @@ static void fillgtt(int fd, unsigned ring, int timeout)
> struct batch *batches;
> unsigned nengine;
> unsigned count;
> - uint64_t size;
> + uint64_t size, ahnd;
> + bool do_relocs = gem_has_relocations(fd);
>
> shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
> igt_assert(shared != MAP_FAILED);
> @@ -138,6 +171,8 @@ static void fillgtt(int fd, unsigned ring, int timeout)
> igt_assert(nengine * 64 <= BATCH_SIZE);
>
> size = gem_aperture_size(fd);
> + if (!gem_uses_full_ppgtt(fd))
> + size /= 2;
> if (size > 1ull<<32) /* Limit to 4GiB as we do not use allow-48b */
> size = 1ull << 32;
> igt_require(size < (1ull<<32) * BATCH_SIZE);
> @@ -145,6 +180,12 @@ static void fillgtt(int fd, unsigned ring, int timeout)
> count = size / BATCH_SIZE + 1;
> igt_debug("Using %'d batches to fill %'llu aperture on %d engines\n",
> count, (long long)size, nengine);
> +
> + intel_allocator_multiprocess_start();
intel_allocator_multiprocess_start()|stop() should be in igt_fixture.
Otherwise if test will fail we got hanging allocator thread. This likely
is not a problem for CI (igt_runner calls tests individually) we can
encounter unpredictable effects when tests are running sequentially.
--
Zbigniew
> + /* Avoid allocating on the last page */
> + ahnd = intel_allocator_open_full(fd, 0, 0, size - 4096,
> + INTEL_ALLOCATOR_SIMPLE,
> + ALLOC_STRATEGY_HIGH_TO_LOW);
> intel_require_memory(count, BATCH_SIZE, CHECK_RAM);
> intel_detect_and_clear_missed_interrupts(fd);
>
> @@ -165,7 +206,7 @@ static void fillgtt(int fd, unsigned ring, int timeout)
> }
>
> /* Flush all memory before we start the timer */
> - submit(fd, gen, &execbuf, reloc, batches, count);
> + submit(fd, gen, &execbuf, reloc, batches, count, ahnd, do_relocs);
>
> igt_info("Setup %u batches in %.2fms\n",
> count, 1e-6 * igt_nsec_elapsed(&tv));
> @@ -176,8 +217,14 @@ static void fillgtt(int fd, unsigned ring, int timeout)
> igt_permute_array(batches, count, xchg_batch);
> execbuf.batch_start_offset = child*64;
> execbuf.flags |= engines[child];
> +
> + /* We need to open the allocator again in the new process */
> + ahnd = intel_allocator_open_full(fd, 0, 0, size - 4096,
> + INTEL_ALLOCATOR_SIMPLE,
> + ALLOC_STRATEGY_HIGH_TO_LOW);
> +
> igt_until_timeout(timeout) {
> - submit(fd, gen, &execbuf, reloc, batches, count);
> + submit(fd, gen, &execbuf, reloc, batches, count, ahnd, do_relocs);
> for (unsigned i = 0; i < count; i++) {
> uint64_t offset, delta;
>
> @@ -189,13 +236,18 @@ static void fillgtt(int fd, unsigned ring, int timeout)
> }
> shared[child] = cycles;
> igt_info("engine[%d]: %llu cycles\n", child, (long long)cycles);
> + intel_allocator_close(ahnd);
> }
> igt_waitchildren();
>
> + intel_allocator_close(ahnd);
> + intel_allocator_multiprocess_stop();
> +
> for (unsigned i = 0; i < count; i++) {
> munmap(batches[i].ptr, BATCH_SIZE);
> gem_close(fd, batches[i].handle);
> }
> + free(batches);
>
> shared[nengine] = 0;
> for (unsigned i = 0; i < nengine; i++)
> @@ -216,6 +268,7 @@ igt_main
> igt_fork_hang_detector(i915);
> }
>
> +
> igt_subtest("basic") /* just enough to run a single pass */
> fillgtt(i915, ALL_ENGINES, 1);
>
> --
> 2.25.1
>
More information about the igt-dev
mailing list