[PATCH i-g-t 01/20] lib/igt_dummyload: Add support of allocator use in igt spinner

Wed Apr 21 15:02:09 UTC 2021

For gens without relocations we need to use softpin with valid offsets
which do not overlap other execbuf objects. As spinner during creation
knows nothing about vm it has to run into allocator handle must be
passed to properly acquire offsets from allocator instance.

Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
Cc: Petri Latvala <petri.latvala at intel.com>
---
 lib/igt_dummyload.c | 129 +++++++++++++++++++++++++++++---------------
 lib/igt_dummyload.h |   3 ++
 2 files changed, 90 insertions(+), 42 deletions(-)

diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c
index 34ad92216..cb632c8cf 100644
--- a/lib/igt_dummyload.c
+++ b/lib/igt_dummyload.c
@@ -35,11 +35,13 @@
 #include "i915/gem_engine_topology.h"
 #include "i915/gem_mman.h"
 #include "i915/gem_submission.h"
+#include "igt_aux.h"
 #include "igt_core.h"
 #include "igt_device.h"
 #include "igt_dummyload.h"
 #include "igt_gt.h"
 #include "igt_vgem.h"
+#include "intel_allocator.h"
 #include "intel_chipset.h"
 #include "intel_reg.h"
 #include "ioctl_wrappers.h"
@@ -99,8 +101,8 @@ emit_recursive_batch(igt_spin_t *spin,
 	unsigned int flags[GEM_MAX_ENGINES];
 	unsigned int nengine;
 	int fence_fd = -1;
-	uint64_t addr;
-	uint32_t *cs;
+	uint64_t addr, addr_scratch, ahnd = opts->ahnd, objflags = 0;
+	uint32_t *cs, delta;
 	int i;
 
 	/*
@@ -117,11 +119,16 @@ emit_recursive_batch(igt_spin_t *spin,
 	 * are not allowed in the first 256KiB, for fear of negative relocations
 	 * that wrap.
 	 */
-	addr = gem_aperture_size(fd) / 2;
-	if (addr >> 31)
-		addr = 1 << 31;
-	addr += random() % addr / 2;
-	addr &= -4096;
+	if (!ahnd) {
+		addr = gem_aperture_size(fd) / 2;
+		if (addr >> 31)
+			addr = 1 << 31;
+		addr += random() % addr / 2;
+		addr &= -4096;
+	} else {
+		spin->ahnd = ahnd;
+		objflags |= EXEC_OBJECT_PINNED;
+	}
 
 	nengine = 0;
 	if (opts->engine == ALL_ENGINES) {
@@ -159,15 +166,27 @@ emit_recursive_batch(igt_spin_t *spin,
 	execbuf->buffer_count++;
 	cs = spin->batch;
 
-	obj[BATCH].offset = addr;
-	addr += BATCH_SIZE;
+	if (ahnd)
+		addr = intel_allocator_alloc_with_strategy(ahnd, obj[BATCH].handle,
+							   BATCH_SIZE, 0,
+							   ALLOC_STRATEGY_LOW_TO_HIGH);
+	obj[BATCH].offset = CANONICAL(addr);
+	obj[BATCH].flags |= objflags;
 
 	if (opts->dependency) {
 		igt_assert(!(opts->flags & IGT_SPIN_POLL_RUN));
+		if (ahnd)
+			addr_scratch = intel_allocator_alloc_with_strategy(ahnd, opts->dependency,
+									   BATCH_SIZE, 0,
+									   ALLOC_STRATEGY_LOW_TO_HIGH);
+		else
+			addr_scratch = addr + BATCH_SIZE;
 
 		obj[SCRATCH].handle = opts->dependency;
-		obj[SCRATCH].offset = addr;
-		if (!(opts->flags & IGT_SPIN_SOFTDEP)) {
+		obj[SCRATCH].offset = CANONICAL(addr_scratch);
+		obj[SCRATCH].flags |= objflags | EXEC_OBJECT_WRITE;
+
+		if (!(opts->flags & IGT_SPIN_SOFTDEP) && !ahnd) {
 			obj[SCRATCH].flags = EXEC_OBJECT_WRITE;
 
 			/* dummy write to dependency */
@@ -182,7 +201,8 @@ emit_recursive_batch(igt_spin_t *spin,
 
 		execbuf->buffer_count++;
 	} else if (opts->flags & IGT_SPIN_POLL_RUN) {
-		r = &relocs[obj[BATCH].relocation_count++];
+		if (!ahnd)
+			r = &relocs[obj[BATCH].relocation_count++];
 
 		igt_assert(!opts->dependency);
 
@@ -207,29 +227,38 @@ emit_recursive_batch(igt_spin_t *spin,
 								       0, 4096,
 								       PROT_READ | PROT_WRITE);
 		}
+
+		if (ahnd)
+			addr = intel_allocator_alloc_with_strategy(ahnd, opts->dependency,
+								   BATCH_SIZE * 3, 0,
+								   ALLOC_STRATEGY_LOW_TO_HIGH);
 		addr += 4096; /* guard page */
-		obj[SCRATCH].offset = addr;
+		obj[SCRATCH].offset = CANONICAL(addr);
 		addr += 4096;
 
 		igt_assert_eq(spin->poll[SPIN_POLL_START_IDX], 0);
 
-		r->presumed_offset = obj[SCRATCH].offset;
-		r->target_handle = obj[SCRATCH].handle;
-		r->offset = sizeof(uint32_t) * 1;
-		r->delta = sizeof(uint32_t) * SPIN_POLL_START_IDX;
+		delta = sizeof(uint32_t) * SPIN_POLL_START_IDX;
+		if (!ahnd) {
+			r->presumed_offset = obj[SCRATCH].offset;
+			r->target_handle = obj[SCRATCH].handle;
+			r->offset = sizeof(uint32_t) * 1;
+			r->delta = delta;
+		}
+		obj[SCRATCH].flags |= objflags;
 
 		*cs++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
 
 		if (gen >= 8) {
-			*cs++ = r->presumed_offset + r->delta;
-			*cs++ = 0;
+			*cs++ = obj[SCRATCH].offset + delta;
+			*cs++ = obj[SCRATCH].offset >> 32;
 		} else if (gen >= 4) {
 			*cs++ = 0;
-			*cs++ = r->presumed_offset + r->delta;
+			*cs++ = obj[SCRATCH].offset + delta;
 			r->offset += sizeof(uint32_t);
 		} else {
 			cs[-1]--;
-			*cs++ = r->presumed_offset + r->delta;
+			*cs++ = obj[SCRATCH].offset + delta;
 		}
 
 		*cs++ = 1;
@@ -273,7 +302,8 @@ emit_recursive_batch(igt_spin_t *spin,
 	 * no matter how they modify it (from either the GPU or CPU).
 	 */
 	if (gen >= 8) { /* arbitrary cutoff between ring/execlists submission */
-		r = &relocs[obj[BATCH].relocation_count++];
+		if (!ahnd)
+			r = &relocs[obj[BATCH].relocation_count++];
 
 		/*
 		 * On Sandybridge+ the comparison is a strict greater-than:
@@ -288,37 +318,46 @@ emit_recursive_batch(igt_spin_t *spin,
 		spin->condition[0] = 0xffffffff;
 		spin->condition[1] = 0xffffffff;
 
-		r->presumed_offset = obj[BATCH].offset;
-		r->target_handle = obj[BATCH].handle;
-		r->offset = (cs + 2 - spin->batch) * sizeof(*cs);
-		r->read_domains = I915_GEM_DOMAIN_COMMAND;
-		r->delta = (spin->condition - spin->batch) * sizeof(*cs);
+		delta = (spin->condition - spin->batch) * sizeof(*cs);
+		if (!ahnd) {
+			r->presumed_offset = obj[BATCH].offset;
+			r->target_handle = obj[BATCH].handle;
+			r->offset = (cs + 2 - spin->batch) * sizeof(*cs);
+			r->read_domains = I915_GEM_DOMAIN_COMMAND;
+			r->delta = delta;
+		}
 
 		*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | 2;
 		*cs++ = MI_BATCH_BUFFER_END;
-		*cs++ = r->presumed_offset + r->delta;
+		*cs++ = obj[BATCH].offset + delta;
 		*cs++ = 0;
 	}
 
 	/* recurse */
-	r = &relocs[obj[BATCH].relocation_count++];
-	r->target_handle = obj[BATCH].handle;
-	r->presumed_offset = obj[BATCH].offset;
-	r->offset = (cs + 1 - spin->batch) * sizeof(*cs);
-	r->read_domains = I915_GEM_DOMAIN_COMMAND;
-	r->delta = LOOP_START_OFFSET;
+	delta = LOOP_START_OFFSET;
+	if (!ahnd) {
+		r = &relocs[obj[BATCH].relocation_count++];
+		r->target_handle = obj[BATCH].handle;
+		r->presumed_offset = obj[BATCH].offset;
+		r->offset = (cs + 1 - spin->batch) * sizeof(*cs);
+		r->read_domains = I915_GEM_DOMAIN_COMMAND;
+		r->delta = delta;
+	}
+
 	if (gen >= 8) {
 		*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
-		*cs++ = r->presumed_offset + r->delta;
-		*cs++ = 0;
+		*cs++ = obj[BATCH].offset + delta;
+		*cs++ = obj[BATCH].offset >> 32;
 	} else if (gen >= 6) {
 		*cs++ = MI_BATCH_BUFFER_START | 1 << 8;
-		*cs++ = r->presumed_offset + r->delta;
+		*cs++ = obj[BATCH].offset + delta;
 	} else {
 		*cs++ = MI_BATCH_BUFFER_START | 2 << 6;
-		if (gen < 4)
-			r->delta |= 1;
-		*cs = r->presumed_offset + r->delta;
+		if (gen < 4) {
+			delta |= 1;
+			r->delta = delta;
+		}
+		*cs++ = obj[BATCH].offset + delta;
 		cs++;
 	}
 	obj[BATCH].relocs_ptr = to_user_pointer(relocs);
@@ -566,11 +605,17 @@ static void __igt_spin_free(int fd, igt_spin_t *spin)
 	if (spin->batch)
 		gem_munmap(spin->batch, BATCH_SIZE);
 
-	if (spin->poll_handle)
+	if (spin->poll_handle) {
 		gem_close(fd, spin->poll_handle);
+		if (spin->ahnd)
+			intel_allocator_free(spin->ahnd, spin->poll_handle);
+	}
 
-	if (spin->handle)
+	if (spin->handle) {
 		gem_close(fd, spin->handle);
+		if (spin->ahnd)
+			intel_allocator_free(spin->ahnd, spin->handle);
+	}
 
 	if (spin->out_fence >= 0)
 		close(spin->out_fence);
diff --git a/lib/igt_dummyload.h b/lib/igt_dummyload.h
index a75fcdeb8..ff8b0dd42 100644
--- a/lib/igt_dummyload.h
+++ b/lib/igt_dummyload.h
@@ -57,6 +57,8 @@ typedef struct igt_spin {
 
 	unsigned int flags;
 #define SPIN_CLFLUSH (1 << 0)
+
+	uint64_t ahnd;
 } igt_spin_t;
 
 struct igt_spin_factory {
@@ -65,6 +67,7 @@ struct igt_spin_factory {
 	unsigned int engine;
 	unsigned int flags;
 	int fence;
+	uint64_t ahnd;
 };
 
 #define IGT_SPIN_FENCE_IN      (1 << 0)
-- 
2.26.0