[PATCH i-g-t 37/47] WIP: gem_exec_schedule
Zbigniew Kempczyński
zbigniew.kempczynski at intel.com
Wed Jun 30 08:39:53 UTC 2021
---
tests/i915/gem_exec_schedule.c | 202 +++++++++++++++++++++++++++------
1 file changed, 168 insertions(+), 34 deletions(-)
diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index fe3b8d29b..2eec34128 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -90,8 +90,9 @@ void __sync_read_u32_count(int fd, uint32_t handle, uint32_t *dst, uint64_t size
gem_read(fd, handle, 0, dst, size);
}
-static uint32_t __store_dword(int fd, uint32_t ctx, unsigned ring,
- uint32_t target, uint32_t offset, uint32_t value,
+static uint32_t __store_dword(int fd, uint64_t ahnd, uint32_t ctx, unsigned ring,
+ uint32_t target, uint64_t target_offset,
+ uint32_t offset, uint32_t value,
uint32_t cork, int fence, unsigned write_domain)
{
const unsigned int gen = intel_gen(intel_get_drm_devid(fd));
@@ -116,12 +117,26 @@ static uint32_t __store_dword(int fd, uint32_t ctx, unsigned ring,
memset(obj, 0, sizeof(obj));
obj[0].handle = cork;
- obj[0].offset = cork << 20;
obj[1].handle = target;
- obj[1].offset = target << 20;
obj[2].handle = gem_create(fd, 4096);
- obj[2].offset = 256 << 10;
- obj[2].offset += (random() % 128) << 12;
+ if (ahnd) {
+ /* If cork handle == 0 skip getting the offset */
+ if (obj[0].handle) {
+ obj[0].offset = get_offset(ahnd, obj[0].handle, 4096, 0);
+ obj[0].flags |= EXEC_OBJECT_PINNED;
+ }
+ obj[1].offset = target_offset;
+ obj[1].flags |= EXEC_OBJECT_PINNED;
+ if (write_domain)
+ obj[1].flags |= EXEC_OBJECT_WRITE;
+ obj[2].offset = get_offset(ahnd, obj[2].handle, 4096, 0);
+ obj[2].flags |= EXEC_OBJECT_PINNED;
+ } else {
+ obj[0].offset = cork << 20;
+ obj[1].offset = target << 20;
+ obj[2].offset = 256 << 10;
+ obj[2].offset += (random() % 128) << 12;
+ }
memset(&reloc, 0, sizeof(reloc));
reloc.target_handle = obj[1].handle;
@@ -131,13 +146,13 @@ static uint32_t __store_dword(int fd, uint32_t ctx, unsigned ring,
reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
reloc.write_domain = write_domain;
obj[2].relocs_ptr = to_user_pointer(&reloc);
- obj[2].relocation_count = 1;
+ obj[2].relocation_count = !ahnd ? 1 : 0;
i = 0;
batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
if (gen >= 8) {
batch[++i] = reloc.presumed_offset + reloc.delta;
- batch[++i] = 0;
+ batch[++i] = reloc.presumed_offset >> 32;
} else if (gen >= 4) {
batch[++i] = 0;
batch[++i] = reloc.presumed_offset + reloc.delta;
@@ -158,8 +173,17 @@ static void store_dword(int fd, uint32_t ctx, unsigned ring,
uint32_t target, uint32_t offset, uint32_t value,
unsigned write_domain)
{
- gem_close(fd, __store_dword(fd, ctx, ring,
- target, offset, value,
+ gem_close(fd, __store_dword(fd, 0, ctx, ring,
+ target, 123123, offset, value,
+ 0, -1, write_domain));
+}
+
+static void store_dword2(int fd, uint64_t ahnd, uint32_t ctx, unsigned ring,
+ uint32_t target, uint32_t offset, uint32_t value,
+ unsigned write_domain)
+{
+ gem_close(fd, __store_dword(fd, ahnd, ctx, ring,
+ target, 123123, offset, value,
0, -1, write_domain));
}
@@ -167,8 +191,17 @@ static void store_dword_plug(int fd, uint32_t ctx, unsigned ring,
uint32_t target, uint32_t offset, uint32_t value,
uint32_t cork, unsigned write_domain)
{
- gem_close(fd, __store_dword(fd, ctx, ring,
- target, offset, value,
+ gem_close(fd, __store_dword(fd, 0, ctx, ring,
+ target, 123123, offset, value,
+ cork, -1, write_domain));
+}
+
+static void store_dword_plug2(int fd, uint64_t ahnd, uint32_t ctx, unsigned ring,
+ uint32_t target, uint32_t offset, uint32_t value,
+ uint32_t cork, unsigned write_domain)
+{
+ gem_close(fd, __store_dword(fd, ahnd, ctx, ring,
+ target, 123123, offset, value,
cork, -1, write_domain));
}
@@ -176,11 +209,22 @@ static void store_dword_fenced(int fd, uint32_t ctx, unsigned ring,
uint32_t target, uint32_t offset, uint32_t value,
int fence, unsigned write_domain)
{
- gem_close(fd, __store_dword(fd, ctx, ring,
- target, offset, value,
+ gem_close(fd, __store_dword(fd, 0, ctx, ring,
+ target, 123123, offset, value,
+ 0, fence, write_domain));
+}
+
+static void store_dword_fenced2(int fd, uint64_t ahnd, uint32_t ctx, unsigned ring,
+ uint32_t target, uint64_t target_offset,
+ uint32_t offset, uint32_t value,
+ int fence, unsigned write_domain)
+{
+ gem_close(fd, __store_dword(fd, ahnd, ctx, ring,
+ target, target_offset, offset, value,
0, fence, write_domain));
}
+
static uint32_t create_highest_priority(int fd)
{
uint32_t ctx = gem_context_clone_with_engines(fd, 0);
@@ -195,7 +239,8 @@ static uint32_t create_highest_priority(int fd)
return ctx;
}
-static void unplug_show_queue(int fd, struct igt_cork *c, unsigned int engine)
+static void unplug_show_queue(int fd,
+ struct igt_cork *c, unsigned int engine)
{
igt_spin_t *spin[MAX_ELSP_QLEN];
int max = MAX_ELSP_QLEN;
@@ -221,26 +266,64 @@ static void unplug_show_queue(int fd, struct igt_cork *c, unsigned int engine)
}
+static void unplug_show_queue2(int fd, struct igt_cork *c, unsigned int engine)
+{
+ igt_spin_t *spin[MAX_ELSP_QLEN];
+ int max = MAX_ELSP_QLEN;
+
+ /* If no scheduler, all batches are emitted in submission order */
+ if (!gem_scheduler_enabled(fd))
+ max = 1;
+
+ for (int n = 0; n < max; n++) {
+ uint32_t ctx_id = create_highest_priority(fd);
+ uint64_t ahnd = get_reloc_ahnd(fd, ctx_id);
+ const struct igt_spin_factory opts = {
+ .ahnd = ahnd,
+ .ctx_id = ctx_id,
+ .engine = engine,
+ };
+ spin[n] = __igt_spin_factory(fd, &opts);
+ gem_context_destroy(fd, opts.ctx_id);
+ }
+
+ igt_cork_unplug(c); /* batches will now be queued on the engine */
+ igt_debugfs_dump(fd, "i915_engine_info");
+
+ for (int n = 0; n < max; n++) {
+ uint64_t ahnd = spin[n]->ahnd;
+ igt_spin_free(fd, spin[n]);
+ put_ahnd(ahnd);
+ }
+
+}
+
+
static void fifo(int fd, unsigned ring)
{
IGT_CORK_FENCE(cork);
uint32_t scratch;
uint32_t result;
int fence;
+ uint64_t ahnd = get_reloc_ahnd(fd, 0), scratch_offset;
scratch = gem_create(fd, 4096);
+ scratch_offset = get_offset(ahnd, scratch, 4096, 0);
fence = igt_cork_plug(&cork, fd);
/* Same priority, same timeline, final result will be the second eb */
- store_dword_fenced(fd, 0, ring, scratch, 0, 1, fence, 0);
- store_dword_fenced(fd, 0, ring, scratch, 0, 2, fence, 0);
+ store_dword_fenced2(fd, ahnd, 0, ring, scratch, scratch_offset,
+ 0, 1, fence, 0);
+ store_dword_fenced2(fd, ahnd, 0, ring, scratch, scratch_offset,
+ 0, 2, fence, 0);
- unplug_show_queue(fd, &cork, ring);
+ unplug_show_queue2(fd, &cork, ring);
close(fence);
result = __sync_read_u32(fd, scratch, 0);
gem_close(fd, scratch);
+ put_ahnd(ahnd);
igt_assert_eq_u32(result, 2);
}
@@ -258,6 +341,7 @@ static void implicit_rw(int i915, unsigned ring, enum implicit_dir dir)
uint32_t scratch;
uint32_t result;
int fence;
+ uint64_t ahnd = get_reloc_ahnd(i915, 0), scratch_offset;
count = 0;
__for_each_physical_engine(i915, e) {
@@ -272,11 +356,12 @@ static void implicit_rw(int i915, unsigned ring, enum implicit_dir dir)
igt_require(count);
scratch = gem_create(i915, 4096);
+ scratch_offset = get_offset(ahnd, scratch, 4096, 0);
fence = igt_cork_plug(&cork, i915);
if (dir & WRITE_READ)
- store_dword_fenced(i915, 0,
- ring, scratch, 0, ~ring,
+ store_dword_fenced2(i915, ahnd, 0,
+ ring, scratch, scratch_offset, 0, ~ring,
fence, I915_GEM_DOMAIN_RENDER);
__for_each_physical_engine(i915, e) {
@@ -286,21 +371,23 @@ static void implicit_rw(int i915, unsigned ring, enum implicit_dir dir)
if (!gem_class_can_store_dword(i915, e->class))
continue;
- store_dword_fenced(i915, 0,
- e->flags, scratch, 0, e->flags,
+ store_dword_fenced2(i915, ahnd, 0,
+ e->flags, scratch, scratch_offset,
+ 0, e->flags,
fence, 0);
}
if (dir & READ_WRITE)
- store_dword_fenced(i915, 0,
- ring, scratch, 0, ring,
+ store_dword_fenced2(i915, ahnd, 0,
+ ring, scratch, scratch_offset, 0, ring,
fence, I915_GEM_DOMAIN_RENDER);
- unplug_show_queue(i915, &cork, ring);
+ unplug_show_queue2(i915, &cork, ring);
close(fence);
result = __sync_read_u32(i915, scratch, 0);
gem_close(i915, scratch);
+ put_ahnd(ahnd);
if (dir & WRITE_READ)
igt_assert_neq_u32(result, ~ring);
@@ -316,8 +403,10 @@ static void independent(int fd, unsigned int engine, unsigned long flags)
uint32_t scratch, batch;
uint32_t *ptr;
int fence;
+ uint64_t ahnd = get_reloc_ahnd(fd, 0), scratch_offset;
scratch = gem_create(fd, 4096);
+ scratch_offset = get_offset(ahnd, scratch, 4096, 0);
ptr = gem_mmap__device_coherent(fd, scratch, 0, 4096, PROT_READ);
igt_assert_eq(ptr[0], 0);
@@ -333,6 +422,7 @@ static void independent(int fd, unsigned int engine, unsigned long flags)
if (spin == NULL) {
spin = __igt_spin_new(fd,
+ .ahnd = ahnd,
.engine = e->flags,
.flags = flags);
} else {
@@ -344,14 +434,17 @@ static void independent(int fd, unsigned int engine, unsigned long flags)
gem_execbuf(fd, &eb);
}
- store_dword_fenced(fd, 0, e->flags, scratch, 0, e->flags, fence, 0);
+ store_dword_fenced2(fd, ahnd, 0, e->flags,
+ scratch, scratch_offset,
+ 0, e->flags, fence, 0);
}
igt_require(spin);
/* Same priority, but different timeline (as different engine) */
- batch = __store_dword(fd, 0, engine, scratch, 0, engine, 0, fence, 0);
+ batch = __store_dword(fd, ahnd, 0, engine, scratch, scratch_offset,
+ 0, engine, 0, fence, 0);
- unplug_show_queue(fd, &cork, engine);
+ unplug_show_queue2(fd, &cork, engine);
close(fence);
gem_sync(fd, batch);
@@ -632,12 +725,15 @@ static void lateslice(int i915, unsigned int engine, unsigned long flags)
{
igt_spin_t *spin[3];
uint32_t ctx;
+ uint64_t ahnd[3];
igt_require(gem_scheduler_has_timeslicing(i915));
igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
ctx = gem_context_create(i915);
- spin[0] = igt_spin_new(i915, .ctx_id = ctx, .engine = engine,
+ ahnd[0] = get_reloc_ahnd(i915, ctx);
+ spin[0] = igt_spin_new(i915, .ahnd = ahnd[0], .ctx_id = ctx,
+ .engine = engine,
.flags = (IGT_SPIN_POLL_RUN |
IGT_SPIN_FENCE_OUT |
flags));
@@ -646,8 +742,9 @@ static void lateslice(int i915, unsigned int engine, unsigned long flags)
igt_spin_busywait_until_started(spin[0]);
ctx = gem_context_create(i915);
- spin[1] = igt_spin_new(i915, .ctx_id = ctx, .engine = engine,
- .fence = spin[0]->out_fence,
+ ahnd[1] = get_reloc_ahnd(i915, ctx);
+ spin[1] = igt_spin_new(i915, .ahnd = ahnd[1], .ctx_id = ctx,
+ .engine = engine, .fence = spin[0]->out_fence,
.flags = (IGT_SPIN_POLL_RUN |
IGT_SPIN_FENCE_IN |
flags));
@@ -663,7 +760,9 @@ static void lateslice(int i915, unsigned int engine, unsigned long flags)
*/
ctx = gem_context_create(i915);
- spin[2] = igt_spin_new(i915, .ctx_id = ctx, .engine = engine,
+ ahnd[2] = get_reloc_ahnd(i915, ctx);
+ spin[2] = igt_spin_new(i915, .ahnd = ahnd[2], .ctx_id = ctx,
+ .engine = engine,
.flags = IGT_SPIN_POLL_RUN | flags);
gem_context_destroy(i915, ctx);
@@ -684,6 +783,9 @@ static void lateslice(int i915, unsigned int engine, unsigned long flags)
igt_assert(gem_bo_busy(i915, spin[1]->handle));
igt_spin_free(i915, spin[1]);
+
+ for (int i = 0; i < ARRAY_SIZE(ahnd); i++)
+ put_ahnd(ahnd[i]);
}
static void cancel_spinner(int i915,
@@ -733,6 +835,8 @@ static void submit_slice(int i915,
.value = to_user_pointer(&engines),
.size = sizeof(engines),
};
+ uint64_t ahnd0 = get_reloc_ahnd(i915, 0);
+ uint64_t ahndX = get_reloc_ahnd(i915, param.ctx_id);
/*
* When using a submit fence, we do not want to block concurrent work,
@@ -752,7 +856,7 @@ static void submit_slice(int i915,
igt_debug("Testing cancellation from %s\n", e->name);
- bg = igt_spin_new(i915, .engine = e->flags);
+ bg = igt_spin_new(i915, .ahnd = ahnd0, .engine = e->flags);
if (flags & LATE_SUBMIT) {
timeline = sw_sync_timeline_create();
@@ -762,7 +866,8 @@ static void submit_slice(int i915,
engines.engines[0].engine_class = e->class;
engines.engines[0].engine_instance = e->instance;
gem_context_set_param(i915, ¶m);
- spin = igt_spin_new(i915, .ctx_id = param.ctx_id,
+ spin = igt_spin_new(i915, .ahnd = ahndX,
+ .ctx_id = param.ctx_id,
.fence = fence,
.flags =
IGT_SPIN_POLL_RUN |
@@ -789,6 +894,8 @@ static void submit_slice(int i915,
}
gem_context_destroy(i915, param.ctx_id);
+ put_ahnd(ahnd0);
+ put_ahnd(ahndX);
}
static uint32_t __batch_create(int i915, uint32_t offset)
@@ -815,6 +922,7 @@ static void semaphore_userlock(int i915, unsigned long flags)
};
igt_spin_t *spin = NULL;
uint32_t scratch;
+ uint64_t ahnd = get_reloc_ahnd(i915, 0);
igt_require(gem_scheduler_has_timeslicing(i915));
@@ -829,6 +937,7 @@ static void semaphore_userlock(int i915, unsigned long flags)
__for_each_physical_engine(i915, e) {
if (!spin) {
spin = igt_spin_new(i915,
+ .ahnd = ahnd,
.dependency = scratch,
.engine = e->flags,
.flags = flags);
@@ -870,6 +979,7 @@ static void semaphore_userlock(int i915, unsigned long flags)
gem_close(i915, obj.handle);
igt_spin_free(i915, spin);
+ put_ahnd(ahnd);
}
static void semaphore_codependency(int i915, unsigned long flags)
@@ -878,6 +988,7 @@ static void semaphore_codependency(int i915, unsigned long flags)
struct {
igt_spin_t *xcs, *rcs;
} task[2];
+ uint64_t ahnd[GEM_MAX_ENGINES] = {};
int i;
/*
@@ -903,9 +1014,11 @@ static void semaphore_codependency(int i915, unsigned long flags)
continue;
ctx = gem_context_clone_with_engines(i915, 0);
+ ahnd[i] = get_simple_l2h_ahnd(i915, ctx);
task[i].xcs =
__igt_spin_new(i915,
+ .ahnd = ahnd[i],
.ctx_id = ctx,
.engine = e->flags,
.flags = IGT_SPIN_POLL_RUN | flags);
@@ -914,6 +1027,7 @@ static void semaphore_codependency(int i915, unsigned long flags)
/* Common rcs tasks will be queued in FIFO */
task[i].rcs =
__igt_spin_new(i915,
+ .ahnd = ahnd[i],
.ctx_id = ctx,
.engine = 0,
.dependency = task[i].xcs->handle);
@@ -939,6 +1053,11 @@ static void semaphore_codependency(int i915, unsigned long flags)
igt_spin_free(i915, task[i].xcs);
igt_spin_free(i915, task[i].rcs);
}
+
+ i = 0;
+ __for_each_physical_engine(i915, e) {
+ put_ahnd(ahnd[i++]);
+ }
}
static void semaphore_resolve(int i915, unsigned long flags)
@@ -2705,9 +2824,16 @@ static uint32_t read_ctx_timestamp(int i915,
#define RUNTIME (base + 0x3a8)
uint32_t *map, *cs;
uint32_t ts;
+ uint64_t ahnd = get_reloc_ahnd(i915, ctx);
igt_require(base);
+ if (ahnd) {
+ obj.offset = get_offset(ahnd, obj.handle, 4096, 0);
+ obj.flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+ obj.relocation_count = 0;
+ }
+
cs = map = gem_mmap__device_coherent(i915, obj.handle,
0, 4096, PROT_WRITE);
@@ -2722,6 +2848,7 @@ static uint32_t read_ctx_timestamp(int i915,
*cs++ = obj.offset >> 32;
*cs++ = MI_BATCH_BUFFER_END;
+ put_ahnd(ahnd);
gem_execbuf(i915, &execbuf);
gem_sync(i915, obj.handle);
@@ -2743,11 +2870,14 @@ static void fairslice(int i915,
double threshold;
uint32_t ctx[3];
uint32_t ts[3];
+ uint64_t ahnd;
for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
ctx[i] = gem_context_clone_with_engines(i915, 0);
if (spin == NULL) {
+ ahnd = get_reloc_ahnd(i915, ctx[i]);
spin = __igt_spin_new(i915,
+ .ahnd = ahnd,
.ctx_id = ctx[i],
.engine = e->flags,
.flags = flags);
@@ -2880,13 +3010,16 @@ igt_main
fairslice(fd, e, IGT_SPIN_USERPTR, 2);
igt_subtest("fairslice-all") {
+ intel_allocator_multiprocess_start();
__for_each_physical_engine(fd, e) {
igt_fork(child, 1)
fairslice(fd, e, 0, 2);
}
igt_waitchildren();
+ intel_allocator_multiprocess_stop();
}
igt_subtest("u-fairslice-all") {
+ intel_allocator_multiprocess_start();
__for_each_physical_engine(fd, e) {
igt_fork(child, 1)
fairslice(fd, e,
@@ -2894,6 +3027,7 @@ igt_main
2);
}
igt_waitchildren();
+ intel_allocator_multiprocess_stop();
}
}
--
2.26.0
More information about the Intel-gfx-trybot
mailing list