[Intel-gfx] [PATCH igt 04/10] igt/gem_exec_schedule: Exercise reordering with many priority levels
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Mon Jul 31 14:35:45 UTC 2017
On 28/07/2017 13:08, Chris Wilson wrote:
> Apply a little more stress to the scheduler.
> ---
> lib/igt_rand.h | 6 +++
> tests/gem_exec_schedule.c | 108 +++++++++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 113 insertions(+), 1 deletion(-)
>
> diff --git a/lib/igt_rand.h b/lib/igt_rand.h
> index f664af41..c9cb3243 100644
> --- a/lib/igt_rand.h
> +++ b/lib/igt_rand.h
> @@ -38,4 +38,10 @@ static inline void hars_petruska_f54_1_random_perturb(uint32_t xor)
> hars_petruska_f54_1_random_seed(hars_petruska_f54_1_random_unsafe());
> }
>
> +/* Returns: pseudo-random number in interval [0, ep_ro) */
> +static inline uint32_t hars_petruska_f54_1_random_unsafe_max(uint32_t ep_ro)
> +{
> + return ((uint64_t)hars_petruska_f54_1_random_unsafe() * ep_ro) >> 32;
> +}
> +
> #endif /* IGT_RAND_H */
> diff --git a/tests/gem_exec_schedule.c b/tests/gem_exec_schedule.c
> index 545dcc2e..1b6e29be 100644
> --- a/tests/gem_exec_schedule.c
> +++ b/tests/gem_exec_schedule.c
> @@ -25,6 +25,7 @@
>
> #include "igt.h"
> #include "igt_vgem.h"
> +#include "igt_rand.h"
>
> #define LOCAL_PARAM_HAS_SCHEDULER 41
> #define LOCAL_CONTEXT_PARAM_PRIORITY 6
> @@ -491,7 +492,7 @@ static void wide(int fd, unsigned ring)
> I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> for (int n = 0; n < NCTX; n++)
> igt_assert_eq_u32(ptr[n], ctx[n]);
> - munmap(ptr, 4096);
> + munmap(ptr, 4*NCTX);
If this patch gets stuck a bit it is worth splitting this out.
>
> gem_close(fd, obj.handle);
> gem_close(fd, result);
> @@ -500,6 +501,108 @@ static void wide(int fd, unsigned ring)
> #undef XS
> }
>
> +static void reorder_wide(int fd, unsigned ring)
> +{
> + const int gen = intel_gen(intel_get_drm_devid(fd));
> + struct drm_i915_gem_relocation_entry reloc;
> + struct drm_i915_gem_exec_object2 obj[3];
> + struct drm_i915_gem_execbuffer2 execbuf;
> + struct cork cork;
> + uint32_t result, target;
> + uint32_t *busy;
> + uint32_t *r, *t;
> +
> + result = gem_create(fd, 4096);
> + target = gem_create(fd, 4096);
> +
> + busy = make_busy(fd, result, ring);
What does make_busy do? It submits eight magic batches which I guess
will not finish until finish_busy? But why eight of them? And..
> + plug(fd, &cork);
... why do we need that since we also control when the below will be
runnable via this?
I think it is time to put some more comments in IGTs to help other
people looking at the code. High level description of a subtest at
least, plus a few notes on the implementation approach.
> +
> + t = gem_mmap__cpu(fd, target, 0, 4096, PROT_WRITE);
> + gem_set_domain(fd, target, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
> +
> + memset(obj, 0, sizeof(obj));
> + obj[0].handle = cork.handle;
> + obj[1].handle = result;
> + obj[2].relocs_ptr = to_user_pointer(&reloc);
> + obj[2].relocation_count = 1;
> +
> + memset(&reloc, 0, sizeof(reloc));
> + reloc.target_handle = result;
> + reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> + reloc.write_domain = 0; /* lies */
> +
> + memset(&execbuf, 0, sizeof(execbuf));
> + execbuf.buffers_ptr = to_user_pointer(obj);
> + execbuf.buffer_count = 3;
> + execbuf.flags = ring;
> + if (gen < 6)
> + execbuf.flags |= I915_EXEC_SECURE;
> +
> + for (int n = -MAX_PRIO, x = 1; n <= MAX_PRIO; n++, x++) {
> + uint32_t *batch;
> +
> + execbuf.rsvd1 = gem_context_create(fd);
> + ctx_set_priority(fd, execbuf.rsvd1, n);
> +
> + obj[2].handle = gem_create(fd, 128 * 64);
What is the significance od 128 and 64?
> + batch = gem_mmap__gtt(fd, obj[2].handle, 128 * 64, PROT_WRITE);
> + gem_set_domain(fd, obj[2].handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +
> + for (int m = 0; m < 128; m++) {
> + uint64_t addr;
> + int idx = hars_petruska_f54_1_random_unsafe_max( 1024);
Stray space.
> + int i;
> +
> + execbuf.batch_start_offset = m * 64;
> + reloc.offset = execbuf.batch_start_offset + sizeof(uint32_t);
> + reloc.delta = idx * sizeof(uint32_t);
> + addr = reloc.presumed_offset + reloc.delta;
> +
> + i = execbuf.batch_start_offset / sizeof(uint32_t);
> + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> + if (gen >= 8) {
> + batch[++i] = addr;
> + batch[++i] = addr >> 32;
> + } else if (gen >= 4) {
> + batch[++i] = 0;
> + batch[++i] = addr;
> + reloc.offset += sizeof(uint32_t);
> + } else {
> + batch[i]--;
> + batch[++i] = addr;
> + }
> + batch[++i] = x;
I wonder is recording another todo, converting all the store_dword_imm
instruction building to a generic helper would be feasible.
> + batch[++i] = MI_BATCH_BUFFER_END;
> +
> + if (!t[idx])
> + t[idx] = x;
bo named target is used only from the cpu side? It could be just some
normal allocated memory in this case if I haven't missed something.
> +
> + gem_execbuf(fd, &execbuf);
> + }
> +
> + munmap(batch, 128 * 64);
> + gem_close(fd, obj[2].handle);
> + gem_context_destroy(fd, execbuf.rsvd1);
Does the ABI guarantee this field will be preserved?
> + }
> +
> + igt_assert(gem_bo_busy(fd, result));
> + unplug(&cork); /* only now submit our batches */
> + igt_debugfs_dump(fd, "i915_engine_info");
> + finish_busy(busy);
> +
> + r = gem_mmap__gtt(fd, result, 4096, PROT_READ);
> + gem_set_domain(fd, result, /* no write hazard lies! */
> + I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> + for (int n = 0; n < 1024; n++)
> + igt_assert_eq_u32(r[n], t[n]);
> + munmap(r, 4096);
> + munmap(t, 4096);
> +
> + gem_close(fd, result);
> + gem_close(fd, target);
> +}
> +
> static bool has_scheduler(int fd)
> {
> drm_i915_getparam_t gp;
> @@ -571,6 +674,9 @@ igt_main
>
> igt_subtest_f("wide-%s", e->name)
> wide(fd, e->exec_id | e->flags);
> +
> + igt_subtest_f("reorder-wide-%s", e->name)
> + reorder_wide(fd, e->exec_id | e->flags);
> }
> }
> }
>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list