[Intel-gfx] [PATCH igt 04/10] igt/gem_exec_schedule: Exercise reordering with many priority levels

Mon Jul 31 14:35:45 UTC 2017

On 28/07/2017 13:08, Chris Wilson wrote:
> Apply a little more stress to the scheduler.
> ---
>   lib/igt_rand.h            |   6 +++
>   tests/gem_exec_schedule.c | 108 +++++++++++++++++++++++++++++++++++++++++++++-
>   2 files changed, 113 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/igt_rand.h b/lib/igt_rand.h
> index f664af41..c9cb3243 100644
> --- a/lib/igt_rand.h
> +++ b/lib/igt_rand.h
> @@ -38,4 +38,10 @@ static inline void hars_petruska_f54_1_random_perturb(uint32_t xor)
>   	hars_petruska_f54_1_random_seed(hars_petruska_f54_1_random_unsafe());
>   }
>   
> +/* Returns: pseudo-random number in interval [0, ep_ro) */
> +static inline uint32_t hars_petruska_f54_1_random_unsafe_max(uint32_t ep_ro)
> +{
> +	return ((uint64_t)hars_petruska_f54_1_random_unsafe() * ep_ro) >> 32;
> +}
> +
>   #endif /* IGT_RAND_H */
> diff --git a/tests/gem_exec_schedule.c b/tests/gem_exec_schedule.c
> index 545dcc2e..1b6e29be 100644
> --- a/tests/gem_exec_schedule.c
> +++ b/tests/gem_exec_schedule.c
> @@ -25,6 +25,7 @@
>   
>   #include "igt.h"
>   #include "igt_vgem.h"
> +#include "igt_rand.h"
>   
>   #define LOCAL_PARAM_HAS_SCHEDULER 41
>   #define LOCAL_CONTEXT_PARAM_PRIORITY 6
> @@ -491,7 +492,7 @@ static void wide(int fd, unsigned ring)
>   			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
>   	for (int n = 0; n < NCTX; n++)
>   		igt_assert_eq_u32(ptr[n], ctx[n]);
> -	munmap(ptr, 4096);
> +	munmap(ptr, 4*NCTX);

If this patch gets stuck a bit it is worth splitting this out.

>   
>   	gem_close(fd, obj.handle);
>   	gem_close(fd, result);
> @@ -500,6 +501,108 @@ static void wide(int fd, unsigned ring)
>   #undef XS
>   }
>   
> +static void reorder_wide(int fd, unsigned ring)
> +{
> +	const int gen = intel_gen(intel_get_drm_devid(fd));
> +	struct drm_i915_gem_relocation_entry reloc;
> +	struct drm_i915_gem_exec_object2 obj[3];
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	struct cork cork;
> +	uint32_t result, target;
> +	uint32_t *busy;
> +	uint32_t *r, *t;
> +
> +	result = gem_create(fd, 4096);
> +	target = gem_create(fd, 4096);
> +
> +	busy = make_busy(fd, result, ring);

What does make_busy do? It submits eight magic batches which I guess 
will not finish until finish_busy? But why eight of them? And..

> +	plug(fd, &cork);

... why do we need that since we also control when the below will be 
runnable via this?

I think it is time to put some more comments in IGTs to help other 
people looking at the code. High level description of a subtest at 
least, plus a few notes on the implementation approach.

> +
> +	t = gem_mmap__cpu(fd, target, 0, 4096, PROT_WRITE);
> +	gem_set_domain(fd, target, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
> +
> +	memset(obj, 0, sizeof(obj));
> +	obj[0].handle = cork.handle;
> +	obj[1].handle = result;
> +	obj[2].relocs_ptr = to_user_pointer(&reloc);
> +	obj[2].relocation_count = 1;
> +
> +	memset(&reloc, 0, sizeof(reloc));
> +	reloc.target_handle = result;
> +	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> +	reloc.write_domain = 0; /* lies */
> +
> +	memset(&execbuf, 0, sizeof(execbuf));
> +	execbuf.buffers_ptr = to_user_pointer(obj);
> +	execbuf.buffer_count = 3;
> +	execbuf.flags = ring;
> +	if (gen < 6)
> +		execbuf.flags |= I915_EXEC_SECURE;
> +
> +	for (int n = -MAX_PRIO, x = 1; n <= MAX_PRIO; n++, x++) {
> +		uint32_t *batch;
> +
> +		execbuf.rsvd1 = gem_context_create(fd);
> +		ctx_set_priority(fd, execbuf.rsvd1, n);
> +
> +		obj[2].handle = gem_create(fd, 128 * 64);

What is the significance od 128 and 64?

> +		batch = gem_mmap__gtt(fd, obj[2].handle, 128 * 64, PROT_WRITE);
> +		gem_set_domain(fd, obj[2].handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +
> +		for (int m = 0; m < 128; m++) {
> +			uint64_t addr;
> +			int idx = hars_petruska_f54_1_random_unsafe_max( 1024);

Stray space.

> +			int i;
> +
> +			execbuf.batch_start_offset = m * 64;
> +			reloc.offset = execbuf.batch_start_offset + sizeof(uint32_t);
> +			reloc.delta = idx * sizeof(uint32_t);
> +			addr = reloc.presumed_offset + reloc.delta;
> +
> +			i = execbuf.batch_start_offset / sizeof(uint32_t);
> +			batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> +			if (gen >= 8) {
> +				batch[++i] = addr;
> +				batch[++i] = addr >> 32;
> +			} else if (gen >= 4) {
> +				batch[++i] = 0;
> +				batch[++i] = addr;
> +				reloc.offset += sizeof(uint32_t);
> +			} else {
> +				batch[i]--;
> +				batch[++i] = addr;
> +			}
> +			batch[++i] = x;

I wonder is recording another todo, converting all the store_dword_imm 
instruction building to a generic helper would be feasible.

> +			batch[++i] = MI_BATCH_BUFFER_END;
> +
> +			if (!t[idx])
> +				t[idx] =  x;

bo named target is used only from the cpu side? It could be just some 
normal allocated memory in this case if I haven't missed something.

> +
> +			gem_execbuf(fd, &execbuf);
> +		}
> +
> +		munmap(batch, 128 * 64);
> +		gem_close(fd, obj[2].handle);
> +		gem_context_destroy(fd, execbuf.rsvd1);

Does the ABI guarantee this field will be preserved?

> +	}
> +
> +	igt_assert(gem_bo_busy(fd, result));
> +	unplug(&cork); /* only now submit our batches */
> +	igt_debugfs_dump(fd, "i915_engine_info");
> +	finish_busy(busy);
> +
> +	r = gem_mmap__gtt(fd, result, 4096, PROT_READ);
> +	gem_set_domain(fd, result, /* no write hazard lies! */
> +			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +	for (int n = 0; n < 1024; n++)
> +		igt_assert_eq_u32(r[n], t[n]);
> +	munmap(r, 4096);
> +	munmap(t, 4096);
> +
> +	gem_close(fd, result);
> +	gem_close(fd, target);
> +}
> +
>   static bool has_scheduler(int fd)
>   {
>   	drm_i915_getparam_t gp;
> @@ -571,6 +674,9 @@ igt_main
>   
>   				igt_subtest_f("wide-%s", e->name)
>   					wide(fd, e->exec_id | e->flags);
> +
> +				igt_subtest_f("reorder-wide-%s", e->name)
> +					reorder_wide(fd, e->exec_id | e->flags);
>   			}
>   		}
>   	}
> 

Regards,

Tvrtko