[PATCH i-g-t 10/15] NORELOC - gem_exec_gttfill

Tue Apr 20 18:47:13 UTC 2021

From: Andrzej Turko <andrzej.turko at intel.com>

---
 tests/i915/gem_exec_gttfill.c | 72 ++++++++++++++++++++++++++++++-----
 1 file changed, 63 insertions(+), 9 deletions(-)

diff --git a/tests/i915/gem_exec_gttfill.c b/tests/i915/gem_exec_gttfill.c
index c0e27c9bb..73f89180e 100644
--- a/tests/i915/gem_exec_gttfill.c
+++ b/tests/i915/gem_exec_gttfill.c
@@ -28,6 +28,7 @@
 IGT_TEST_DESCRIPTION("Fill the GTT with batches.");
 
 #define BATCH_SIZE (4096<<10)
+#define BATCH_ALIGNMENT (1 << 12)
 
 struct batch {
 	uint32_t handle;
@@ -47,15 +48,21 @@ static void xchg_batch(void *array, unsigned int i, unsigned int j)
 static void submit(int fd, int gen,
 		   struct drm_i915_gem_execbuffer2 *eb,
 		   struct drm_i915_gem_relocation_entry *reloc,
-		   struct batch *batches, unsigned int count)
+		   struct batch *batches, unsigned int count,
+		   uint64_t ahnd, bool do_relocs)
 {
 	struct drm_i915_gem_exec_object2 obj;
 	uint32_t batch[16];
-	unsigned n;
+	uint64_t address, value;
+	unsigned n, j;
 
 	memset(&obj, 0, sizeof(obj));
-	obj.relocs_ptr = to_user_pointer(reloc);
-	obj.relocation_count = 2;
+	if (do_relocs) {
+		obj.relocs_ptr = to_user_pointer(reloc);
+		obj.relocation_count = 2;
+	} else {
+		obj.flags |= EXEC_OBJECT_PINNED;
+	}
 
 	memset(reloc, 0, 2*sizeof(*reloc));
 	reloc[0].offset = eb->batch_start_offset;
@@ -85,16 +92,44 @@ static void submit(int fd, int gen,
 	batch[++n] = 0; /* lower_32_bits(value) */
 	batch[++n] = 0; /* upper_32_bits(value) / nop */
 	batch[++n] = MI_BATCH_BUFFER_END;
-
 	eb->buffers_ptr = to_user_pointer(&obj);
+	j = 0;
 	for (unsigned i = 0; i < count; i++) {
 		obj.handle = batches[i].handle;
 		reloc[0].target_handle = obj.handle;
 		reloc[1].target_handle = obj.handle;
 
-		obj.offset = 0;
+		obj.offset = __intel_allocator_alloc(ahnd, obj.handle,
+						     BATCH_SIZE,
+						     BATCH_ALIGNMENT,
+						     ALLOC_STRATEGY_NONE);
+		for (; obj.offset == -1; j = ((++j) == count ? 0 : j)) {
+			if (i != j)
+				intel_allocator_free(ahnd, batches[j].handle);
+			obj.offset = __intel_allocator_alloc(ahnd, obj.handle,
+							     BATCH_SIZE,
+							     BATCH_ALIGNMENT,
+							     ALLOC_STRATEGY_NONE);
+		}
+
 		reloc[0].presumed_offset = obj.offset;
 		reloc[1].presumed_offset = obj.offset;
+		address = obj.offset + reloc[0].delta;
+		value = obj.offset + reloc[1].delta;
+
+		if (gen >= 8) {
+			batch[1] = address;
+			batch[2] = address >> 32;
+
+			batch[3] = value;
+			batch[4] = value >> 32;
+		} else if (gen >=4) {
+			batch[2] = address;
+			batch[3] = value;
+		} else {
+			batch[1] = address;
+			batch[3] = value;
+		}
 
 		memcpy(batches[i].ptr + eb->batch_start_offset,
 		       batch, sizeof(batch));
@@ -116,7 +151,8 @@ static void fillgtt(int fd, unsigned ring, int timeout)
 	struct batch *batches;
 	unsigned nengine;
 	unsigned count;
-	uint64_t size;
+	uint64_t size, ahnd;
+	bool do_relocs = !gem_uses_ppgtt(fd);
 
 	shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
 	igt_assert(shared != MAP_FAILED);
@@ -145,6 +181,12 @@ static void fillgtt(int fd, unsigned ring, int timeout)
 	count = size / BATCH_SIZE + 1;
 	igt_debug("Using %'d batches to fill %'llu aperture on %d engines\n",
 		  count, (long long)size, nengine);
+
+	intel_allocator_multiprocess_start();
+	/* Avoid allocating on the last page */
+	ahnd = intel_allocator_open_full(fd, 0, 0, size - 4096,
+					 INTEL_ALLOCATOR_SIMPLE,
+					 ALLOC_STRATEGY_NONE);
 	intel_require_memory(count, BATCH_SIZE, CHECK_RAM);
 	intel_detect_and_clear_missed_interrupts(fd);
 
@@ -165,7 +207,7 @@ static void fillgtt(int fd, unsigned ring, int timeout)
 	}
 
 	/* Flush all memory before we start the timer */
-	submit(fd, gen, &execbuf, reloc, batches, count);
+	submit(fd, gen, &execbuf, reloc, batches, count, ahnd, do_relocs);
 
 	igt_info("Setup %u batches in %.2fms\n",
 		 count, 1e-6 * igt_nsec_elapsed(&tv));
@@ -176,8 +218,14 @@ static void fillgtt(int fd, unsigned ring, int timeout)
 		igt_permute_array(batches, count, xchg_batch);
 		execbuf.batch_start_offset = child*64;
 		execbuf.flags |= engines[child];
+
+		/* We need to open the allocator again in the new process */
+		ahnd = intel_allocator_open_full(fd, 0, 0, size - 4096,
+						 INTEL_ALLOCATOR_SIMPLE,
+						 ALLOC_STRATEGY_NONE);
+
 		igt_until_timeout(timeout) {
-			submit(fd, gen, &execbuf, reloc, batches, count);
+			submit(fd, gen, &execbuf, reloc, batches, count, ahnd, do_relocs);
 			for (unsigned i = 0; i < count; i++) {
 				uint64_t offset, delta;
 
@@ -189,13 +237,18 @@ static void fillgtt(int fd, unsigned ring, int timeout)
 		}
 		shared[child] = cycles;
 		igt_info("engine[%d]: %llu cycles\n", child, (long long)cycles);
+		intel_allocator_close(ahnd);
 	}
 	igt_waitchildren();
 
+	intel_allocator_close(ahnd);
+	intel_allocator_multiprocess_stop();
+
 	for (unsigned i = 0; i < count; i++) {
 		munmap(batches[i].ptr, BATCH_SIZE);
 		gem_close(fd, batches[i].handle);
 	}
+	free(batches);
 
 	shared[nengine] = 0;
 	for (unsigned i = 0; i < nengine; i++)
@@ -216,6 +269,7 @@ igt_main
 		igt_fork_hang_detector(i915);
 	}
 
+
 	igt_subtest("basic") /* just enough to run a single pass */
 		fillgtt(i915, ALL_ENGINES, 1);
 
-- 
2.26.0