[Intel-gfx] [PATCH i-g-t] i915/gem_exec_parallel: Launch 1s worth of threads
Chris Wilson
chris at chris-wilson.co.uk
Wed Jan 20 10:38:36 UTC 2021
Let's not assume that the thread execution is instantaneous, but apply a
time limit as well as a maximum number so that the test should always run
in bounded time.
Also limit each thread to submitting only two pieces of outstanding work,
to minimise over-saturation. We use two alternating batches as a generic
way of tracking their fences.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
tests/i915/gem_exec_parallel.c | 62 +++++++++++++++++++++++-----------
1 file changed, 42 insertions(+), 20 deletions(-)
diff --git a/tests/i915/gem_exec_parallel.c b/tests/i915/gem_exec_parallel.c
index d3dd06a65..4f0fbdd02 100644
--- a/tests/i915/gem_exec_parallel.c
+++ b/tests/i915/gem_exec_parallel.c
@@ -48,6 +48,7 @@ static inline uint32_t hash32(uint32_t val)
#define USERPTR 0x4
#define NUMOBJ 16
+#define MAXTHREADS 4096
struct thread {
pthread_t thread;
@@ -64,15 +65,15 @@ struct thread {
static void *thread(void *data)
{
struct thread *t = data;
- struct drm_i915_gem_exec_object2 obj[2];
- struct drm_i915_gem_relocation_entry reloc;
+ struct drm_i915_gem_exec_object2 obj[3];
+ struct drm_i915_gem_relocation_entry reloc[2];
struct drm_i915_gem_execbuffer2 execbuf;
uint32_t batch[16];
uint16_t used;
int fd, i;
pthread_mutex_lock(t->mutex);
- while (*t->go == 0)
+ while (READ_ONCE(*t->go) == 0)
pthread_cond_wait(t->cond, t->mutex);
pthread_mutex_unlock(t->mutex);
@@ -101,21 +102,28 @@ static void *thread(void *data)
memset(obj, 0, sizeof(obj));
obj[0].flags = EXEC_OBJECT_WRITE;
- memset(&reloc, 0, sizeof(reloc));
- reloc.offset = sizeof(uint32_t);
+ memset(reloc, 0, sizeof(reloc));
+ reloc[0].offset = sizeof(uint32_t);
if (t->gen < 8 && t->gen >= 4)
- reloc.offset += sizeof(uint32_t);
- reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
- reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
- reloc.delta = 4*t->id;
+ reloc[0].offset += sizeof(uint32_t);
+ reloc[0].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+ reloc[0].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+ reloc[0].delta = 4*t->id;
+ reloc[1] = reloc[0];
+
obj[1].handle = gem_create(fd, 4096);
- obj[1].relocs_ptr = to_user_pointer(&reloc);
+ obj[1].relocs_ptr = to_user_pointer(&reloc[0]);
obj[1].relocation_count = 1;
gem_write(fd, obj[1].handle, 0, batch, sizeof(batch));
+ obj[2].handle = gem_create(fd, 4096);
+ obj[2].relocs_ptr = to_user_pointer(&reloc[1]);
+ obj[2].relocation_count = 1;
+ gem_write(fd, obj[2].handle, 0, batch, sizeof(batch));
+
memset(&execbuf, 0, sizeof(execbuf));
execbuf.buffers_ptr = to_user_pointer(obj);
- execbuf.buffer_count = 2;
+ execbuf.buffer_count = 2; /* NB !ARRAY_SIZE(obj), keep one in reserve */
execbuf.flags = t->engine;
execbuf.flags |= I915_EXEC_HANDLE_LUT;
execbuf.flags |= I915_EXEC_NO_RELOC;
@@ -129,6 +137,8 @@ static void *thread(void *data)
igt_until_timeout(1) {
unsigned int x = rand() % NUMOBJ;
+ igt_swap(obj[1], obj[2]);
+
used |= 1u << x;
obj[0].handle = t->scratch[x];
@@ -139,10 +149,13 @@ static void *thread(void *data)
if (t->flags & FDS)
gem_close(fd, obj[0].handle);
+
+ gem_sync(fd, obj[2].handle);
}
if (t->flags & CONTEXTS)
gem_context_destroy(fd, execbuf.rsvd1);
+ gem_close(fd, obj[2].handle);
gem_close(fd, obj[1].handle);
if (t->flags & FDS)
close(fd);
@@ -153,7 +166,7 @@ static void *thread(void *data)
static void check_bo(int fd, uint32_t handle, int pass, struct thread *threads)
{
- uint32_t x = hash32(handle * pass) % 1024;
+ uint32_t x = hash32(handle * pass) % MAXTHREADS;
uint32_t result;
if (!(threads[x].used & (1 << pass)))
@@ -167,18 +180,20 @@ static void check_bo(int fd, uint32_t handle, int pass, struct thread *threads)
static uint32_t handle_create(int fd, unsigned int flags, void **data)
{
+ unsigned int size = MAXTHREADS * sizeof(uint32_t);
+
if (flags & USERPTR) {
uint32_t handle;
void *ptr;
- posix_memalign(&ptr, 4096, 4096);
- gem_userptr(fd, ptr, 4096, 0, 0, &handle);
+ posix_memalign(&ptr, 4096, size);
+ gem_userptr(fd, ptr, size, 0, 0, &handle);
*data = ptr;
return handle;
}
- return gem_create(fd, 4096);
+ return gem_create(fd, size);
}
static void handle_close(int fd, unsigned int flags, uint32_t handle, void *data)
@@ -197,7 +212,9 @@ static void all(int fd, struct intel_execution_engine2 *engine, unsigned flags)
struct thread *threads;
pthread_mutex_t mutex;
pthread_cond_t cond;
+ struct timespec tv;
void *arg[NUMOBJ];
+ int count;
int go;
int i;
@@ -227,7 +244,7 @@ static void all(int fd, struct intel_execution_engine2 *engine, unsigned flags)
scratch[i] = gem_flink(fd, handle[i]);
}
- threads = calloc(1024, sizeof(struct thread));
+ threads = calloc(MAXTHREADS, sizeof(struct thread));
igt_assert(threads);
intel_detect_and_clear_missed_interrupts(fd);
@@ -235,7 +252,8 @@ static void all(int fd, struct intel_execution_engine2 *engine, unsigned flags)
pthread_cond_init(&cond, 0);
go = 0;
- for (i = 0; i < 1024; i++) {
+ memset(&tv, 0, sizeof(tv));
+ for (i = 0; i < MAXTHREADS && !igt_seconds_elapsed(&tv); i++) {
threads[i].id = i;
threads[i].fd = fd;
threads[i].gen = gen;
@@ -246,15 +264,19 @@ static void all(int fd, struct intel_execution_engine2 *engine, unsigned flags)
threads[i].cond = &cond;
threads[i].go = &go;
- pthread_create(&threads[i].thread, 0, thread, &threads[i]);
+ if (pthread_create(&threads[i].thread, 0, thread, &threads[i]))
+ break;
}
+ count = i;
+ igt_info("Launched %d threads\n", count);
+ igt_require(count);
pthread_mutex_lock(&mutex);
- go = 1024;
+ WRITE_ONCE(go, count);
pthread_cond_broadcast(&cond);
pthread_mutex_unlock(&mutex);
- for (i = 0; i < 1024; i++)
+ for (i = 0; i < count; i++)
pthread_join(threads[i].thread, NULL);
for (i = 0; i < NUMOBJ; i++) {
--
2.30.0
More information about the Intel-gfx
mailing list