[PATCH i-g-t 1/1] tests/i915/gem_exec_gttfill: Support gens without relocations

Thu May 27 04:50:59 UTC 2021

With relocations disabled for newer generations
addresses of objects need to be assigned by the test.
As all the objects won't fit in the gtt, using the allocator
does not guarantee that submitted batches won't overlap.
It only reduces the number of overlapping objects while ensuring
that evictions happen at different offsets.

Signed-off-by: Andrzej Turko <andrzej.turko at linux.intel.com>
Cc: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
---
 tests/i915/gem_exec_gttfill.c | 75 ++++++++++++++++++++++++++++++-----
 1 file changed, 64 insertions(+), 11 deletions(-)

diff --git a/tests/i915/gem_exec_gttfill.c b/tests/i915/gem_exec_gttfill.c
index c0e27c9bb..091c74ebb 100644
--- a/tests/i915/gem_exec_gttfill.c
+++ b/tests/i915/gem_exec_gttfill.c
@@ -28,6 +28,8 @@
 IGT_TEST_DESCRIPTION("Fill the GTT with batches.");
 
 #define BATCH_SIZE (4096<<10)
+/* We don't have alignment detection yet, so assume the worst-case scenario. */
+#define BATCH_ALIGNMENT (1 << 21)
 
 struct batch {
 	uint32_t handle;
@@ -47,15 +49,21 @@ static void xchg_batch(void *array, unsigned int i, unsigned int j)
 static void submit(int fd, int gen,
 		   struct drm_i915_gem_execbuffer2 *eb,
 		   struct drm_i915_gem_relocation_entry *reloc,
-		   struct batch *batches, unsigned int count)
+		   struct batch *batches, unsigned int count,
+		   uint64_t ahnd, bool do_relocs)
 {
 	struct drm_i915_gem_exec_object2 obj;
 	uint32_t batch[16];
-	unsigned n;
+	uint64_t address, value;
+	unsigned n, j;
 
 	memset(&obj, 0, sizeof(obj));
-	obj.relocs_ptr = to_user_pointer(reloc);
-	obj.relocation_count = 2;
+	if (do_relocs) {
+		obj.relocs_ptr = to_user_pointer(reloc);
+		obj.relocation_count = 2;
+	} else {
+		obj.flags |= EXEC_OBJECT_PINNED;
+	}
 
 	memset(reloc, 0, 2*sizeof(*reloc));
 	reloc[0].offset = eb->batch_start_offset;
@@ -85,16 +93,40 @@ static void submit(int fd, int gen,
 	batch[++n] = 0; /* lower_32_bits(value) */
 	batch[++n] = 0; /* upper_32_bits(value) / nop */
 	batch[++n] = MI_BATCH_BUFFER_END;
-
 	eb->buffers_ptr = to_user_pointer(&obj);
+	j = 0;
 	for (unsigned i = 0; i < count; i++) {
 		obj.handle = batches[i].handle;
 		reloc[0].target_handle = obj.handle;
 		reloc[1].target_handle = obj.handle;
 
-		obj.offset = 0;
-		reloc[0].presumed_offset = obj.offset;
-		reloc[1].presumed_offset = obj.offset;
+		if (do_relocs) {
+			obj.offset = 0;
+		} else {
+			obj.offset = __intel_allocator_alloc(ahnd, obj.handle,
+							     BATCH_SIZE,
+							     BATCH_ALIGNMENT,
+							     ALLOC_STRATEGY_HIGH_TO_LOW);
+			for (; obj.offset == -1; j = ((++j) == count ? 0 : j)) {
+				if (i != j)
+					intel_allocator_free(ahnd, batches[j].handle);
+				obj.offset = __intel_allocator_alloc(ahnd, obj.handle,
+								     BATCH_SIZE,
+								     BATCH_ALIGNMENT,
+								     ALLOC_STRATEGY_HIGH_TO_LOW);
+			}
+
+			/* If there is no relocation support, we assume gen >= 8. */
+			reloc[0].presumed_offset = obj.offset;
+			address = obj.offset + reloc[0].delta;
+			batch[1] = address;
+			batch[2] = address >> 32;
+
+			reloc[1].presumed_offset = obj.offset;
+			value = obj.offset + reloc[1].delta;
+			batch[3] = value;
+			batch[4] = value >> 32;
+		}
 
 		memcpy(batches[i].ptr + eb->batch_start_offset,
 		       batch, sizeof(batch));
@@ -116,7 +148,8 @@ static void fillgtt(int fd, unsigned ring, int timeout)
 	struct batch *batches;
 	unsigned nengine;
 	unsigned count;
-	uint64_t size;
+	uint64_t size, ahnd;
+	bool do_relocs = gem_has_relocations(fd);
 
 	shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
 	igt_assert(shared != MAP_FAILED);
@@ -138,6 +171,8 @@ static void fillgtt(int fd, unsigned ring, int timeout)
 	igt_assert(nengine * 64 <= BATCH_SIZE);
 
 	size = gem_aperture_size(fd);
+	if (!gem_uses_full_ppgtt(fd))
+		size /= 2;
 	if (size > 1ull<<32) /* Limit to 4GiB as we do not use allow-48b */
 		size = 1ull << 32;
 	igt_require(size < (1ull<<32) * BATCH_SIZE);
@@ -145,6 +180,12 @@ static void fillgtt(int fd, unsigned ring, int timeout)
 	count = size / BATCH_SIZE + 1;
 	igt_debug("Using %'d batches to fill %'llu aperture on %d engines\n",
 		  count, (long long)size, nengine);
+
+	intel_allocator_multiprocess_start();
+	/* Avoid allocating on the last page */
+	ahnd = intel_allocator_open_full(fd, 0, 0, size - 4096,
+					 INTEL_ALLOCATOR_SIMPLE,
+					 ALLOC_STRATEGY_HIGH_TO_LOW);
 	intel_require_memory(count, BATCH_SIZE, CHECK_RAM);
 	intel_detect_and_clear_missed_interrupts(fd);
 
@@ -165,7 +206,7 @@ static void fillgtt(int fd, unsigned ring, int timeout)
 	}
 
 	/* Flush all memory before we start the timer */
-	submit(fd, gen, &execbuf, reloc, batches, count);
+	submit(fd, gen, &execbuf, reloc, batches, count, ahnd, do_relocs);
 
 	igt_info("Setup %u batches in %.2fms\n",
 		 count, 1e-6 * igt_nsec_elapsed(&tv));
@@ -176,8 +217,14 @@ static void fillgtt(int fd, unsigned ring, int timeout)
 		igt_permute_array(batches, count, xchg_batch);
 		execbuf.batch_start_offset = child*64;
 		execbuf.flags |= engines[child];
+
+		/* We need to open the allocator again in the new process */
+		ahnd = intel_allocator_open_full(fd, 0, 0, size - 4096,
+						 INTEL_ALLOCATOR_SIMPLE,
+						 ALLOC_STRATEGY_HIGH_TO_LOW);
+
 		igt_until_timeout(timeout) {
-			submit(fd, gen, &execbuf, reloc, batches, count);
+			submit(fd, gen, &execbuf, reloc, batches, count, ahnd, do_relocs);
 			for (unsigned i = 0; i < count; i++) {
 				uint64_t offset, delta;
 
@@ -189,13 +236,18 @@ static void fillgtt(int fd, unsigned ring, int timeout)
 		}
 		shared[child] = cycles;
 		igt_info("engine[%d]: %llu cycles\n", child, (long long)cycles);
+		intel_allocator_close(ahnd);
 	}
 	igt_waitchildren();
 
+	intel_allocator_close(ahnd);
+	intel_allocator_multiprocess_stop();
+
 	for (unsigned i = 0; i < count; i++) {
 		munmap(batches[i].ptr, BATCH_SIZE);
 		gem_close(fd, batches[i].handle);
 	}
+	free(batches);
 
 	shared[nengine] = 0;
 	for (unsigned i = 0; i < nengine; i++)
@@ -216,6 +268,7 @@ igt_main
 		igt_fork_hang_detector(i915);
 	}
 
+
 	igt_subtest("basic") /* just enough to run a single pass */
 		fillgtt(i915, ALL_ENGINES, 1);
 
-- 
2.25.1