[igt-dev] [Intel-gfx] [PATCH i-g-t] igt/gem_exec_gttfill: Avoid pwrite into busy handle

Fri Jun 29 15:15:04 UTC 2018

On 28/06/2018 22:35, Chris Wilson wrote:
> The goal of gem_exec_gttfill is to exercise execbuf under heavy GTT
> pressure (by trying to execute more objects than may fit into the GTT).
> We spread the same set of handles across different processes, with the
> result that each would occasionally stall waiting for execution of an
> unrelated batch, limiting the pressure we were applying. If we using a
> steaming write via a WC pointer, we can avoid the serialisation penalty
> and so submit faster.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   tests/gem_exec_gttfill.c | 66 +++++++++++++++++++++++++---------------
>   1 file changed, 42 insertions(+), 24 deletions(-)
> 
> diff --git a/tests/gem_exec_gttfill.c b/tests/gem_exec_gttfill.c
> index 4097e4077..efd612bb6 100644
> --- a/tests/gem_exec_gttfill.c
> +++ b/tests/gem_exec_gttfill.c
> @@ -28,18 +28,25 @@ IGT_TEST_DESCRIPTION("Fill the GTT with batches.");
>   
>   #define BATCH_SIZE (4096<<10)
>   
> -static void xchg_u32(void *array, unsigned i, unsigned j)
> +struct batch {
> +	uint32_t handle;
> +	void *ptr;
> +};
> +
> +static void xchg_batch(void *array, unsigned int i, unsigned int j)
>   {
> -	uint32_t *u32 = array;
> -	uint32_t tmp = u32[i];
> -	u32[i] = u32[j];
> -	u32[j] = tmp;
> +	struct batch *batches = array;
> +	struct batch tmp;
> +
> +	tmp = batches[i];
> +	batches[i] = batches[j];
> +	batches[j] = tmp;
>   }
>   
>   static void submit(int fd, int gen,
>   		   struct drm_i915_gem_execbuffer2 *eb,
>   		   struct drm_i915_gem_relocation_entry *reloc,
> -		   uint32_t *handles, unsigned count)
> +		   struct batch *batches, unsigned int count)
>   {
>   	struct drm_i915_gem_exec_object2 obj;
>   	uint32_t batch[16];
> @@ -80,7 +87,7 @@ static void submit(int fd, int gen,
>   
>   	eb->buffers_ptr = to_user_pointer(&obj);
>   	for (unsigned i = 0; i < count; i++) {
> -		obj.handle = handles[i];
> +		obj.handle = batches[i].handle;
>   		reloc[0].target_handle = obj.handle;
>   		reloc[1].target_handle = obj.handle;
>   
> @@ -88,8 +95,8 @@ static void submit(int fd, int gen,
>   		reloc[0].presumed_offset = obj.offset;
>   		reloc[1].presumed_offset = obj.offset;
>   
> -		gem_write(fd, obj.handle, eb->batch_start_offset,
> -			  batch, sizeof(batch));
> +		memcpy(batches[i].ptr + eb->batch_start_offset,
> +		       batch, sizeof(batch));
>   
>   		gem_execbuf(fd, eb);
>   	}
> @@ -103,7 +110,7 @@ static void fillgtt(int fd, unsigned ring, int timeout)
>   	struct drm_i915_gem_execbuffer2 execbuf;
>   	struct drm_i915_gem_relocation_entry reloc[2];
>   	volatile uint64_t *shared;
> -	unsigned *handles;
> +	struct batch *batches;
>   	unsigned engines[16];
>   	unsigned nengine;
>   	unsigned engine;
> @@ -145,29 +152,38 @@ static void fillgtt(int fd, unsigned ring, int timeout)
>   	if (gen < 6)
>   		execbuf.flags |= I915_EXEC_SECURE;
>   
> -	handles = calloc(count, sizeof(handles));
> -	igt_assert(handles);
> -	for (unsigned i = 0; i < count; i++)
> -		handles[i] = gem_create(fd, BATCH_SIZE);
> +	batches = calloc(count, sizeof(*batches));
> +	igt_assert(batches);
> +	for (unsigned i = 0; i < count; i++) {
> +		batches[i].handle = gem_create(fd, BATCH_SIZE);
> +		batches[i].ptr =
> +			__gem_mmap__wc(fd, batches[i].handle,
> +				       0, BATCH_SIZE, PROT_WRITE);
> +		if (!batches[i].ptr) {
> +			batches[i].ptr =
> +				__gem_mmap__gtt(fd, batches[i].handle,
> +						BATCH_SIZE, PROT_WRITE);
> +		}
> +		igt_require(batches[i].ptr);

Not assert?

> +	}
>   
>   	/* Flush all memory before we start the timer */
> -	submit(fd, gen, &execbuf, reloc, handles, count);
> +	submit(fd, gen, &execbuf, reloc, batches, count);
>   
>   	igt_fork(child, nengine) {
>   		uint64_t cycles = 0;
>   		hars_petruska_f54_1_random_perturb(child);
> -		igt_permute_array(handles, count, xchg_u32);
> +		igt_permute_array(batches, count, xchg_batch);
>   		execbuf.batch_start_offset = child*64;
>   		execbuf.flags |= engines[child];
>   		igt_until_timeout(timeout) {
> -			submit(fd, gen, &execbuf, reloc, handles, count);
> +			submit(fd, gen, &execbuf, reloc, batches, count);
>   			for (unsigned i = 0; i < count; i++) {
> -				uint32_t handle = handles[i];
> -				uint64_t buf[2];
> +				uint64_t offset, delta;
>   
> -				gem_read(fd, handle, reloc[1].offset, &buf[0], sizeof(buf[0]));
> -				gem_read(fd, handle, reloc[0].delta, &buf[1], sizeof(buf[1]));
> -				igt_assert_eq_u64(buf[0], buf[1]);

No flushing or domain management needed, especially since it can be 
either wc or gtt mmap?

> +				offset = *(uint64_t *)(batches[i].ptr + reloc[1].offset);
> +				delta = *(uint64_t *)(batches[i].ptr + reloc[0].delta);
> +				igt_assert_eq_u64(offset, delta);
>   			}
>   			cycles++;
>   		}
> @@ -176,8 +192,10 @@ static void fillgtt(int fd, unsigned ring, int timeout)
>   	}
>   	igt_waitchildren();
>   
> -	for (unsigned i = 0; i < count; i++)
> -		gem_close(fd, handles[i]);
> +	for (unsigned i = 0; i < count; i++) {
> +		munmap(batches[i].ptr, BATCH_SIZE);
> +		gem_close(fd, batches[i].handle);
> +	}
>   
>   	shared[nengine] = 0;
>   	for (unsigned i = 0; i < nengine; i++)
> 
Regards,

Tvrtko