[igt-dev] [PATCH i-g-t 1/2] tests/i915/gem_exec_gttfill: Support gens without relocations

Zbigniew Kempczyński zbigniew.kempczynski at intel.com
Wed May 12 09:28:55 UTC 2021


On Wed, May 12, 2021 at 07:40:09AM +0200, Andrzej Turko wrote:
> With relocations disabled for newer generations
> addresses of objects need to be assigned by the test.
> As all the objects won't fit in the gtt, using the allocator
> does not guarantee that submitted batches won't overlap.
> It only reduces the number of overlapping objects while ensuring
> that evictions happen at different offsets.
> 
> Signed-off-by: Andrzej Turko <andrzej.turko at linux.intel.com>
> Cc: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> ---
>  tests/i915/gem_exec_gttfill.c | 75 ++++++++++++++++++++++++++++++-----
>  1 file changed, 64 insertions(+), 11 deletions(-)
> 
> diff --git a/tests/i915/gem_exec_gttfill.c b/tests/i915/gem_exec_gttfill.c
> index c0e27c9bb..091c74ebb 100644
> --- a/tests/i915/gem_exec_gttfill.c
> +++ b/tests/i915/gem_exec_gttfill.c
> @@ -28,6 +28,8 @@
>  IGT_TEST_DESCRIPTION("Fill the GTT with batches.");
>  
>  #define BATCH_SIZE (4096<<10)
> +/* We don't have alignment detection yet, so assume the worst-case scenario. */
> +#define BATCH_ALIGNMENT (1 << 21)
>  
>  struct batch {
>  	uint32_t handle;
> @@ -47,15 +49,21 @@ static void xchg_batch(void *array, unsigned int i, unsigned int j)
>  static void submit(int fd, int gen,
>  		   struct drm_i915_gem_execbuffer2 *eb,
>  		   struct drm_i915_gem_relocation_entry *reloc,
> -		   struct batch *batches, unsigned int count)
> +		   struct batch *batches, unsigned int count,
> +		   uint64_t ahnd, bool do_relocs)
>  {
>  	struct drm_i915_gem_exec_object2 obj;
>  	uint32_t batch[16];
> -	unsigned n;
> +	uint64_t address, value;
> +	unsigned n, j;
>  
>  	memset(&obj, 0, sizeof(obj));
> -	obj.relocs_ptr = to_user_pointer(reloc);
> -	obj.relocation_count = 2;
> +	if (do_relocs) {
> +		obj.relocs_ptr = to_user_pointer(reloc);
> +		obj.relocation_count = 2;
> +	} else {
> +		obj.flags |= EXEC_OBJECT_PINNED;
> +	}
>  
>  	memset(reloc, 0, 2*sizeof(*reloc));
>  	reloc[0].offset = eb->batch_start_offset;
> @@ -85,16 +93,40 @@ static void submit(int fd, int gen,
>  	batch[++n] = 0; /* lower_32_bits(value) */
>  	batch[++n] = 0; /* upper_32_bits(value) / nop */
>  	batch[++n] = MI_BATCH_BUFFER_END;
> -
>  	eb->buffers_ptr = to_user_pointer(&obj);
> +	j = 0;
>  	for (unsigned i = 0; i < count; i++) {
>  		obj.handle = batches[i].handle;
>  		reloc[0].target_handle = obj.handle;
>  		reloc[1].target_handle = obj.handle;
>  
> -		obj.offset = 0;
> -		reloc[0].presumed_offset = obj.offset;
> -		reloc[1].presumed_offset = obj.offset;
> +		if (do_relocs) {
> +			obj.offset = 0;
> +		} else {
> +			obj.offset = __intel_allocator_alloc(ahnd, obj.handle,
> +							     BATCH_SIZE,
> +							     BATCH_ALIGNMENT,
> +							     ALLOC_STRATEGY_HIGH_TO_LOW);
> +			for (; obj.offset == -1; j = ((++j) == count ? 0 : j)) {
> +				if (i != j)
> +					intel_allocator_free(ahnd, batches[j].handle);
> +				obj.offset = __intel_allocator_alloc(ahnd, obj.handle,
> +								     BATCH_SIZE,
> +								     BATCH_ALIGNMENT,
> +								     ALLOC_STRATEGY_HIGH_TO_LOW);
> +			}
> +
> +			/* If there is no relocation support, we assume gen >= 8. */
> +			reloc[0].presumed_offset = obj.offset;
> +			address = obj.offset + reloc[0].delta;
> +			batch[1] = address;
> +			batch[2] = address >> 32;
> +
> +			reloc[1].presumed_offset = obj.offset;
> +			value = obj.offset + reloc[1].delta;
> +			batch[3] = value;
> +			batch[4] = value >> 32;
> +		}
>  
>  		memcpy(batches[i].ptr + eb->batch_start_offset,
>  		       batch, sizeof(batch));
> @@ -116,7 +148,8 @@ static void fillgtt(int fd, unsigned ring, int timeout)
>  	struct batch *batches;
>  	unsigned nengine;
>  	unsigned count;
> -	uint64_t size;
> +	uint64_t size, ahnd;
> +	bool do_relocs = gem_has_relocations(fd);
>  
>  	shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
>  	igt_assert(shared != MAP_FAILED);
> @@ -138,6 +171,8 @@ static void fillgtt(int fd, unsigned ring, int timeout)
>  	igt_assert(nengine * 64 <= BATCH_SIZE);
>  
>  	size = gem_aperture_size(fd);
> +	if (!gem_uses_full_ppgtt(fd))
> +		size /= 2;

Chris noticed you've limited gtt size unnecessary while we want to fill
it and enforce eviction.

As an idea is to evict bb vma within gtt and we have full control over 
offsets we can limit this to few buffers and move this to gem_softpin at bb-evict
to BAT (skip this for reloc gens). 

So leave test intact adding igt_require(gem_has_relocations()) check
and add gem_softpin at bb-evict which will do eviction in narrower vm range.

--
Zbigniew

>  	if (size > 1ull<<32) /* Limit to 4GiB as we do not use allow-48b */
>  		size = 1ull << 32;
>  	igt_require(size < (1ull<<32) * BATCH_SIZE);
> @@ -145,6 +180,12 @@ static void fillgtt(int fd, unsigned ring, int timeout)
>  	count = size / BATCH_SIZE + 1;
>  	igt_debug("Using %'d batches to fill %'llu aperture on %d engines\n",
>  		  count, (long long)size, nengine);
> +
> +	intel_allocator_multiprocess_start();
> +	/* Avoid allocating on the last page */
> +	ahnd = intel_allocator_open_full(fd, 0, 0, size - 4096,
> +					 INTEL_ALLOCATOR_SIMPLE,
> +					 ALLOC_STRATEGY_HIGH_TO_LOW);
>  	intel_require_memory(count, BATCH_SIZE, CHECK_RAM);
>  	intel_detect_and_clear_missed_interrupts(fd);
>  
> @@ -165,7 +206,7 @@ static void fillgtt(int fd, unsigned ring, int timeout)
>  	}
>  
>  	/* Flush all memory before we start the timer */
> -	submit(fd, gen, &execbuf, reloc, batches, count);
> +	submit(fd, gen, &execbuf, reloc, batches, count, ahnd, do_relocs);
>  
>  	igt_info("Setup %u batches in %.2fms\n",
>  		 count, 1e-6 * igt_nsec_elapsed(&tv));
> @@ -176,8 +217,14 @@ static void fillgtt(int fd, unsigned ring, int timeout)
>  		igt_permute_array(batches, count, xchg_batch);
>  		execbuf.batch_start_offset = child*64;
>  		execbuf.flags |= engines[child];
> +
> +		/* We need to open the allocator again in the new process */
> +		ahnd = intel_allocator_open_full(fd, 0, 0, size - 4096,
> +						 INTEL_ALLOCATOR_SIMPLE,
> +						 ALLOC_STRATEGY_HIGH_TO_LOW);
> +
>  		igt_until_timeout(timeout) {
> -			submit(fd, gen, &execbuf, reloc, batches, count);
> +			submit(fd, gen, &execbuf, reloc, batches, count, ahnd, do_relocs);
>  			for (unsigned i = 0; i < count; i++) {
>  				uint64_t offset, delta;
>  
> @@ -189,13 +236,18 @@ static void fillgtt(int fd, unsigned ring, int timeout)
>  		}
>  		shared[child] = cycles;
>  		igt_info("engine[%d]: %llu cycles\n", child, (long long)cycles);
> +		intel_allocator_close(ahnd);
>  	}
>  	igt_waitchildren();
>  
> +	intel_allocator_close(ahnd);
> +	intel_allocator_multiprocess_stop();
> +
>  	for (unsigned i = 0; i < count; i++) {
>  		munmap(batches[i].ptr, BATCH_SIZE);
>  		gem_close(fd, batches[i].handle);
>  	}
> +	free(batches);
>  
>  	shared[nengine] = 0;
>  	for (unsigned i = 0; i < nengine; i++)
> @@ -216,6 +268,7 @@ igt_main
>  		igt_fork_hang_detector(i915);
>  	}
>  
> +
>  	igt_subtest("basic") /* just enough to run a single pass */
>  		fillgtt(i915, ALL_ENGINES, 1);
>  
> -- 
> 2.25.1
> 


More information about the igt-dev mailing list