[PATCH i-g-t 38/52] WIP: gem_exec_schedule

Thu Jul 1 12:37:18 UTC 2021

---
 tests/i915/gem_exec_schedule.c | 202 +++++++++++++++++++++++++++------
 1 file changed, 168 insertions(+), 34 deletions(-)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index fe3b8d29b..2eec34128 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -90,8 +90,9 @@ void __sync_read_u32_count(int fd, uint32_t handle, uint32_t *dst, uint64_t size
 	gem_read(fd, handle, 0, dst, size);
 }
 
-static uint32_t __store_dword(int fd, uint32_t ctx, unsigned ring,
-			      uint32_t target, uint32_t offset, uint32_t value,
+static uint32_t __store_dword(int fd, uint64_t ahnd, uint32_t ctx, unsigned ring,
+			      uint32_t target, uint64_t target_offset,
+			      uint32_t offset, uint32_t value,
 			      uint32_t cork, int fence, unsigned write_domain)
 {
 	const unsigned int gen = intel_gen(intel_get_drm_devid(fd));
@@ -116,12 +117,26 @@ static uint32_t __store_dword(int fd, uint32_t ctx, unsigned ring,
 
 	memset(obj, 0, sizeof(obj));
 	obj[0].handle = cork;
-	obj[0].offset = cork << 20;
 	obj[1].handle = target;
-	obj[1].offset = target << 20;
 	obj[2].handle = gem_create(fd, 4096);
-	obj[2].offset = 256 << 10;
-	obj[2].offset += (random() % 128) << 12;
+	if (ahnd) {
+		/* If cork handle == 0 skip getting the offset */
+		if (obj[0].handle) {
+			obj[0].offset = get_offset(ahnd, obj[0].handle, 4096, 0);
+			obj[0].flags |= EXEC_OBJECT_PINNED;
+		}
+		obj[1].offset = target_offset;
+		obj[1].flags |= EXEC_OBJECT_PINNED;
+		if (write_domain)
+			obj[1].flags |= EXEC_OBJECT_WRITE;
+		obj[2].offset = get_offset(ahnd, obj[2].handle, 4096, 0);
+		obj[2].flags |= EXEC_OBJECT_PINNED;
+	} else {
+		obj[0].offset = cork << 20;
+		obj[1].offset = target << 20;
+		obj[2].offset = 256 << 10;
+		obj[2].offset += (random() % 128) << 12;
+	}
 
 	memset(&reloc, 0, sizeof(reloc));
 	reloc.target_handle = obj[1].handle;
@@ -131,13 +146,13 @@ static uint32_t __store_dword(int fd, uint32_t ctx, unsigned ring,
 	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
 	reloc.write_domain = write_domain;
 	obj[2].relocs_ptr = to_user_pointer(&reloc);
-	obj[2].relocation_count = 1;
+	obj[2].relocation_count = !ahnd ? 1 : 0;
 
 	i = 0;
 	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
 		batch[++i] = reloc.presumed_offset + reloc.delta;
-		batch[++i] = 0;
+		batch[++i] = reloc.presumed_offset >> 32;
 	} else if (gen >= 4) {
 		batch[++i] = 0;
 		batch[++i] = reloc.presumed_offset + reloc.delta;
@@ -158,8 +173,17 @@ static void store_dword(int fd, uint32_t ctx, unsigned ring,
 			uint32_t target, uint32_t offset, uint32_t value,
 			unsigned write_domain)
 {
-	gem_close(fd, __store_dword(fd, ctx, ring,
-				    target, offset, value,
+	gem_close(fd, __store_dword(fd, 0, ctx, ring,
+				    target, 123123, offset, value,
+				    0, -1, write_domain));
+}
+
+static void store_dword2(int fd, uint64_t ahnd, uint32_t ctx, unsigned ring,
+			uint32_t target, uint32_t offset, uint32_t value,
+			unsigned write_domain)
+{
+	gem_close(fd, __store_dword(fd, ahnd, ctx, ring,
+				    target, 123123, offset, value,
 				    0, -1, write_domain));
 }
 
@@ -167,8 +191,17 @@ static void store_dword_plug(int fd, uint32_t ctx, unsigned ring,
 			     uint32_t target, uint32_t offset, uint32_t value,
 			     uint32_t cork, unsigned write_domain)
 {
-	gem_close(fd, __store_dword(fd, ctx, ring,
-				    target, offset, value,
+	gem_close(fd, __store_dword(fd, 0, ctx, ring,
+				    target, 123123, offset, value,
+				    cork, -1, write_domain));
+}
+
+static void store_dword_plug2(int fd, uint64_t ahnd, uint32_t ctx, unsigned ring,
+			     uint32_t target, uint32_t offset, uint32_t value,
+			     uint32_t cork, unsigned write_domain)
+{
+	gem_close(fd, __store_dword(fd, ahnd, ctx, ring,
+				    target, 123123, offset, value,
 				    cork, -1, write_domain));
 }
 
@@ -176,11 +209,22 @@ static void store_dword_fenced(int fd, uint32_t ctx, unsigned ring,
 			       uint32_t target, uint32_t offset, uint32_t value,
 			       int fence, unsigned write_domain)
 {
-	gem_close(fd, __store_dword(fd, ctx, ring,
-				    target, offset, value,
+	gem_close(fd, __store_dword(fd, 0, ctx, ring,
+				    target, 123123, offset, value,
+				    0, fence, write_domain));
+}
+
+static void store_dword_fenced2(int fd, uint64_t ahnd, uint32_t ctx, unsigned ring,
+			       uint32_t target, uint64_t target_offset,
+				uint32_t offset, uint32_t value,
+			       int fence, unsigned write_domain)
+{
+	gem_close(fd, __store_dword(fd, ahnd, ctx, ring,
+				    target, target_offset, offset, value,
 				    0, fence, write_domain));
 }
 
+
 static uint32_t create_highest_priority(int fd)
 {
 	uint32_t ctx = gem_context_clone_with_engines(fd, 0);
@@ -195,7 +239,8 @@ static uint32_t create_highest_priority(int fd)
 	return ctx;
 }
 
-static void unplug_show_queue(int fd, struct igt_cork *c, unsigned int engine)
+static void unplug_show_queue(int fd,
+			      struct igt_cork *c, unsigned int engine)
 {
 	igt_spin_t *spin[MAX_ELSP_QLEN];
 	int max = MAX_ELSP_QLEN;
@@ -221,26 +266,64 @@ static void unplug_show_queue(int fd, struct igt_cork *c, unsigned int engine)
 
 }
 
+static void unplug_show_queue2(int fd, struct igt_cork *c, unsigned int engine)
+{
+	igt_spin_t *spin[MAX_ELSP_QLEN];
+	int max = MAX_ELSP_QLEN;
+
+	/* If no scheduler, all batches are emitted in submission order */
+	if (!gem_scheduler_enabled(fd))
+		max = 1;
+
+	for (int n = 0; n < max; n++) {
+		uint32_t ctx_id = create_highest_priority(fd);
+		uint64_t ahnd = get_reloc_ahnd(fd, ctx_id);
+		const struct igt_spin_factory opts = {
+			.ahnd = ahnd,
+			.ctx_id = ctx_id,
+			.engine = engine,
+		};
+		spin[n] = __igt_spin_factory(fd, &opts);
+		gem_context_destroy(fd, opts.ctx_id);
+	}
+
+	igt_cork_unplug(c); /* batches will now be queued on the engine */
+	igt_debugfs_dump(fd, "i915_engine_info");
+
+	for (int n = 0; n < max; n++) {
+		uint64_t ahnd = spin[n]->ahnd;
+		igt_spin_free(fd, spin[n]);
+		put_ahnd(ahnd);
+	}
+
+}
+
+
 static void fifo(int fd, unsigned ring)
 {
 	IGT_CORK_FENCE(cork);
 	uint32_t scratch;
 	uint32_t result;
 	int fence;
+	uint64_t ahnd = get_reloc_ahnd(fd, 0), scratch_offset;
 
 	scratch = gem_create(fd, 4096);
+	scratch_offset = get_offset(ahnd, scratch, 4096, 0);
 
 	fence = igt_cork_plug(&cork, fd);
 
 	/* Same priority, same timeline, final result will be the second eb */
-	store_dword_fenced(fd, 0, ring, scratch, 0, 1, fence, 0);
-	store_dword_fenced(fd, 0, ring, scratch, 0, 2, fence, 0);
+	store_dword_fenced2(fd, ahnd, 0, ring, scratch, scratch_offset,
+			    0, 1, fence, 0);
+	store_dword_fenced2(fd, ahnd, 0, ring, scratch, scratch_offset,
+			    0, 2, fence, 0);
 
-	unplug_show_queue(fd, &cork, ring);
+	unplug_show_queue2(fd, &cork, ring);
 	close(fence);
 
 	result =  __sync_read_u32(fd, scratch, 0);
 	gem_close(fd, scratch);
+	put_ahnd(ahnd);
 
 	igt_assert_eq_u32(result, 2);
 }
@@ -258,6 +341,7 @@ static void implicit_rw(int i915, unsigned ring, enum implicit_dir dir)
 	uint32_t scratch;
 	uint32_t result;
 	int fence;
+	uint64_t ahnd = get_reloc_ahnd(i915, 0), scratch_offset;
 
 	count = 0;
 	__for_each_physical_engine(i915, e) {
@@ -272,11 +356,12 @@ static void implicit_rw(int i915, unsigned ring, enum implicit_dir dir)
 	igt_require(count);
 
 	scratch = gem_create(i915, 4096);
+	scratch_offset = get_offset(ahnd, scratch, 4096, 0);
 	fence = igt_cork_plug(&cork, i915);
 
 	if (dir & WRITE_READ)
-		store_dword_fenced(i915, 0,
-				   ring, scratch, 0, ~ring,
+		store_dword_fenced2(i915, ahnd, 0,
+				   ring, scratch, scratch_offset, 0, ~ring,
 				   fence, I915_GEM_DOMAIN_RENDER);
 
 	__for_each_physical_engine(i915, e) {
@@ -286,21 +371,23 @@ static void implicit_rw(int i915, unsigned ring, enum implicit_dir dir)
 		if (!gem_class_can_store_dword(i915, e->class))
 			continue;
 
-		store_dword_fenced(i915, 0,
-				   e->flags, scratch, 0, e->flags,
+		store_dword_fenced2(i915, ahnd, 0,
+				   e->flags, scratch, scratch_offset,
+				   0, e->flags,
 				   fence, 0);
 	}
 
 	if (dir & READ_WRITE)
-		store_dword_fenced(i915, 0,
-				   ring, scratch, 0, ring,
+		store_dword_fenced2(i915, ahnd, 0,
+				   ring, scratch, scratch_offset, 0, ring,
 				   fence, I915_GEM_DOMAIN_RENDER);
 
-	unplug_show_queue(i915, &cork, ring);
+	unplug_show_queue2(i915, &cork, ring);
 	close(fence);
 
 	result =  __sync_read_u32(i915, scratch, 0);
 	gem_close(i915, scratch);
+	put_ahnd(ahnd);
 
 	if (dir & WRITE_READ)
 		igt_assert_neq_u32(result, ~ring);
@@ -316,8 +403,10 @@ static void independent(int fd, unsigned int engine, unsigned long flags)
 	uint32_t scratch, batch;
 	uint32_t *ptr;
 	int fence;
+	uint64_t ahnd = get_reloc_ahnd(fd, 0), scratch_offset;
 
 	scratch = gem_create(fd, 4096);
+	scratch_offset = get_offset(ahnd, scratch, 4096, 0);
 	ptr = gem_mmap__device_coherent(fd, scratch, 0, 4096, PROT_READ);
 	igt_assert_eq(ptr[0], 0);
 
@@ -333,6 +422,7 @@ static void independent(int fd, unsigned int engine, unsigned long flags)
 
 		if (spin == NULL) {
 			spin = __igt_spin_new(fd,
+					      .ahnd = ahnd,
 					      .engine = e->flags,
 					      .flags = flags);
 		} else {
@@ -344,14 +434,17 @@ static void independent(int fd, unsigned int engine, unsigned long flags)
 			gem_execbuf(fd, &eb);
 		}
 
-		store_dword_fenced(fd, 0, e->flags, scratch, 0, e->flags, fence, 0);
+		store_dword_fenced2(fd, ahnd, 0, e->flags,
+				    scratch, scratch_offset,
+				    0, e->flags, fence, 0);
 	}
 	igt_require(spin);
 
 	/* Same priority, but different timeline (as different engine) */
-	batch = __store_dword(fd, 0, engine, scratch, 0, engine, 0, fence, 0);
+	batch = __store_dword(fd, ahnd, 0, engine, scratch, scratch_offset,
+			      0, engine, 0, fence, 0);
 
-	unplug_show_queue(fd, &cork, engine);
+	unplug_show_queue2(fd, &cork, engine);
 	close(fence);
 
 	gem_sync(fd, batch);
@@ -632,12 +725,15 @@ static void lateslice(int i915, unsigned int engine, unsigned long flags)
 {
 	igt_spin_t *spin[3];
 	uint32_t ctx;
+	uint64_t ahnd[3];
 
 	igt_require(gem_scheduler_has_timeslicing(i915));
 	igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
 
 	ctx = gem_context_create(i915);
-	spin[0] = igt_spin_new(i915, .ctx_id = ctx, .engine = engine,
+	ahnd[0] = get_reloc_ahnd(i915, ctx);
+	spin[0] = igt_spin_new(i915, .ahnd = ahnd[0], .ctx_id = ctx,
+			       .engine = engine,
 			       .flags = (IGT_SPIN_POLL_RUN |
 					 IGT_SPIN_FENCE_OUT |
 					 flags));
@@ -646,8 +742,9 @@ static void lateslice(int i915, unsigned int engine, unsigned long flags)
 	igt_spin_busywait_until_started(spin[0]);
 
 	ctx = gem_context_create(i915);
-	spin[1] = igt_spin_new(i915, .ctx_id = ctx, .engine = engine,
-			       .fence = spin[0]->out_fence,
+	ahnd[1] = get_reloc_ahnd(i915, ctx);
+	spin[1] = igt_spin_new(i915, .ahnd = ahnd[1], .ctx_id = ctx,
+			       .engine = engine, .fence = spin[0]->out_fence,
 			       .flags = (IGT_SPIN_POLL_RUN |
 					 IGT_SPIN_FENCE_IN |
 					 flags));
@@ -663,7 +760,9 @@ static void lateslice(int i915, unsigned int engine, unsigned long flags)
 	 */
 
 	ctx = gem_context_create(i915);
-	spin[2] = igt_spin_new(i915, .ctx_id = ctx, .engine = engine,
+	ahnd[2] = get_reloc_ahnd(i915, ctx);
+	spin[2] = igt_spin_new(i915, .ahnd = ahnd[2], .ctx_id = ctx,
+			       .engine = engine,
 			       .flags = IGT_SPIN_POLL_RUN | flags);
 	gem_context_destroy(i915, ctx);
 
@@ -684,6 +783,9 @@ static void lateslice(int i915, unsigned int engine, unsigned long flags)
 
 	igt_assert(gem_bo_busy(i915, spin[1]->handle));
 	igt_spin_free(i915, spin[1]);
+
+	for (int i = 0; i < ARRAY_SIZE(ahnd); i++)
+		put_ahnd(ahnd[i]);
 }
 
 static void cancel_spinner(int i915,
@@ -733,6 +835,8 @@ static void submit_slice(int i915,
 		.value = to_user_pointer(&engines),
 		.size = sizeof(engines),
 	};
+	uint64_t ahnd0 = get_reloc_ahnd(i915, 0);
+	uint64_t ahndX = get_reloc_ahnd(i915, param.ctx_id);
 
 	/*
 	 * When using a submit fence, we do not want to block concurrent work,
@@ -752,7 +856,7 @@ static void submit_slice(int i915,
 
 		igt_debug("Testing cancellation from %s\n", e->name);
 
-		bg = igt_spin_new(i915, .engine = e->flags);
+		bg = igt_spin_new(i915, .ahnd = ahnd0, .engine = e->flags);
 
 		if (flags & LATE_SUBMIT) {
 			timeline = sw_sync_timeline_create();
@@ -762,7 +866,8 @@ static void submit_slice(int i915,
 		engines.engines[0].engine_class = e->class;
 		engines.engines[0].engine_instance = e->instance;
 		gem_context_set_param(i915, &param);
-		spin = igt_spin_new(i915, .ctx_id = param.ctx_id,
+		spin = igt_spin_new(i915, .ahnd = ahndX,
+				    .ctx_id = param.ctx_id,
 				    .fence = fence,
 				    .flags =
 				    IGT_SPIN_POLL_RUN |
@@ -789,6 +894,8 @@ static void submit_slice(int i915,
 	}
 
 	gem_context_destroy(i915, param.ctx_id);
+	put_ahnd(ahnd0);
+	put_ahnd(ahndX);
 }
 
 static uint32_t __batch_create(int i915, uint32_t offset)
@@ -815,6 +922,7 @@ static void semaphore_userlock(int i915, unsigned long flags)
 	};
 	igt_spin_t *spin = NULL;
 	uint32_t scratch;
+	uint64_t ahnd = get_reloc_ahnd(i915, 0);
 
 	igt_require(gem_scheduler_has_timeslicing(i915));
 
@@ -829,6 +937,7 @@ static void semaphore_userlock(int i915, unsigned long flags)
 	__for_each_physical_engine(i915, e) {
 		if (!spin) {
 			spin = igt_spin_new(i915,
+					    .ahnd = ahnd,
 					    .dependency = scratch,
 					    .engine = e->flags,
 					    .flags = flags);
@@ -870,6 +979,7 @@ static void semaphore_userlock(int i915, unsigned long flags)
 	gem_close(i915, obj.handle);
 
 	igt_spin_free(i915, spin);
+	put_ahnd(ahnd);
 }
 
 static void semaphore_codependency(int i915, unsigned long flags)
@@ -878,6 +988,7 @@ static void semaphore_codependency(int i915, unsigned long flags)
 	struct {
 		igt_spin_t *xcs, *rcs;
 	} task[2];
+	uint64_t ahnd[GEM_MAX_ENGINES] = {};
 	int i;
 
 	/*
@@ -903,9 +1014,11 @@ static void semaphore_codependency(int i915, unsigned long flags)
 			continue;
 
 		ctx = gem_context_clone_with_engines(i915, 0);
+		ahnd[i] = get_simple_l2h_ahnd(i915, ctx);
 
 		task[i].xcs =
 			__igt_spin_new(i915,
+				       .ahnd = ahnd[i],
 				       .ctx_id = ctx,
 				       .engine = e->flags,
 				       .flags = IGT_SPIN_POLL_RUN | flags);
@@ -914,6 +1027,7 @@ static void semaphore_codependency(int i915, unsigned long flags)
 		/* Common rcs tasks will be queued in FIFO */
 		task[i].rcs =
 			__igt_spin_new(i915,
+				       .ahnd = ahnd[i],
 				       .ctx_id = ctx,
 				       .engine = 0,
 				       .dependency = task[i].xcs->handle);
@@ -939,6 +1053,11 @@ static void semaphore_codependency(int i915, unsigned long flags)
 		igt_spin_free(i915, task[i].xcs);
 		igt_spin_free(i915, task[i].rcs);
 	}
+
+	i = 0;
+	__for_each_physical_engine(i915, e) {
+		put_ahnd(ahnd[i++]);
+	}
 }
 
 static void semaphore_resolve(int i915, unsigned long flags)
@@ -2705,9 +2824,16 @@ static uint32_t read_ctx_timestamp(int i915,
 #define RUNTIME (base + 0x3a8)
 	uint32_t *map, *cs;
 	uint32_t ts;
+	uint64_t ahnd = get_reloc_ahnd(i915, ctx);
 
 	igt_require(base);
 
+	if (ahnd) {
+		obj.offset = get_offset(ahnd, obj.handle, 4096, 0);
+		obj.flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+		obj.relocation_count = 0;
+	}
+
 	cs = map = gem_mmap__device_coherent(i915, obj.handle,
 					     0, 4096, PROT_WRITE);
 
@@ -2722,6 +2848,7 @@ static uint32_t read_ctx_timestamp(int i915,
 	*cs++ = obj.offset >> 32;
 
 	*cs++ = MI_BATCH_BUFFER_END;
+	put_ahnd(ahnd);
 
 	gem_execbuf(i915, &execbuf);
 	gem_sync(i915, obj.handle);
@@ -2743,11 +2870,14 @@ static void fairslice(int i915,
 	double threshold;
 	uint32_t ctx[3];
 	uint32_t ts[3];
+	uint64_t ahnd;
 
 	for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
 		ctx[i] = gem_context_clone_with_engines(i915, 0);
 		if (spin == NULL) {
+			ahnd = get_reloc_ahnd(i915, ctx[i]);
 			spin = __igt_spin_new(i915,
+					      .ahnd = ahnd,
 					      .ctx_id = ctx[i],
 					      .engine = e->flags,
 					      .flags = flags);
@@ -2880,13 +3010,16 @@ igt_main
 				fairslice(fd, e, IGT_SPIN_USERPTR, 2);
 
 			igt_subtest("fairslice-all")  {
+				intel_allocator_multiprocess_start();
 				__for_each_physical_engine(fd, e) {
 					igt_fork(child, 1)
 						fairslice(fd, e, 0, 2);
 				}
 				igt_waitchildren();
+				intel_allocator_multiprocess_stop();
 			}
 			igt_subtest("u-fairslice-all")  {
+				intel_allocator_multiprocess_start();
 				__for_each_physical_engine(fd, e) {
 					igt_fork(child, 1)
 						fairslice(fd, e,
@@ -2894,6 +3027,7 @@ igt_main
 							  2);
 				}
 				igt_waitchildren();
+				intel_allocator_multiprocess_stop();
 			}
 		}
 
-- 
2.26.0