[igt-dev] [PATCH i-g-t v5 35/65] tests/gem_softpin: Exercise eviction with softpinning

Tue Aug 10 05:42:31 UTC 2021

On Mon, Aug 09, 2021 at 03:05:03PM +0200, Zbigniew Kempczyński wrote:
> From: Andrzej Turko <andrzej.turko at linux.intel.com>
> 
> Exercise eviction of many gem objects. The added tests are analogous
> to gem_exec_gttfill, but they use softpin and do not require relocation
> support.
> 
> Signed-off-by: Andrzej Turko <andrzej.turko at linux.intel.com>
> Cc: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> Cc: Petri Latvala <petri.latvala at intel.com>
> Cc: Ashutosh Dixit <ashutosh.dixit at intel.com>
> ---
>  tests/i915/gem_softpin.c | 213 ++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 212 insertions(+), 1 deletion(-)
> 
> diff --git a/tests/i915/gem_softpin.c b/tests/i915/gem_softpin.c
> index 5e47a0ce3..2c9c3320e 100644
> --- a/tests/i915/gem_softpin.c
> +++ b/tests/i915/gem_softpin.c
> @@ -29,6 +29,7 @@
>  #include "i915/gem.h"
>  #include "i915/gem_create.h"
>  #include "igt.h"
> +#include "igt_rand.h"
>  #include "intel_allocator.h"
>  
>  #define EXEC_OBJECT_PINNED	(1<<4)
> @@ -878,9 +879,209 @@ static void test_allocator_fork(int fd)
>  	intel_allocator_multiprocess_stop();
>  }
>  
> +#define BATCH_SIZE (4096<<10)
> +/* We don't have alignment detection yet, so assume the worst-case scenario. */
> +#define BATCH_ALIGNMENT (1 << 21)
> +
> +struct batch {
> +	uint32_t handle;
> +	void *ptr;
> +};
> +
> +static void xchg_batch(void *array, unsigned int i, unsigned int j)
> +{
> +	struct batch *batches = array;
> +	struct batch tmp;
> +
> +	tmp = batches[i];
> +	batches[i] = batches[j];
> +	batches[j] = tmp;
> +}
> +
> +static void submit(int fd, int gen,
> +		   struct drm_i915_gem_execbuffer2 *eb,
> +		   struct batch *batches, unsigned int count,
> +		   uint64_t ahnd)
> +{
> +	struct drm_i915_gem_exec_object2 obj;
> +	uint32_t batch[16];
> +	uint64_t address;
> +	unsigned n;
> +
> +	memset(&obj, 0, sizeof(obj));
> +	obj.flags = EXEC_OBJECT_PINNED;
> +
> +	for (unsigned i = 0; i < count; i++) {
> +		obj.handle = batches[i].handle;
> +		obj.offset = intel_allocator_alloc(ahnd, obj.handle,
> +						   BATCH_SIZE,
> +						   BATCH_ALIGNMENT);
> +		address = obj.offset + BATCH_SIZE - eb->batch_start_offset - 8;
> +		n = 0;
> +		batch[n] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> +		if (gen >= 8) {
> +			batch[n] |= 1 << 21;
> +			batch[n]++;
> +			batch[++n] = address;
> +			batch[++n] = address >> 32;
> +		} else if (gen >= 4) {
> +			batch[++n] = 0;
> +			batch[++n] = address;
> +		} else {
> +			batch[n]--;
> +			batch[++n] = address;
> +		}
> +		batch[++n] = obj.offset; /* lower_32_bits(value) */
> +		batch[++n] = obj.offset >> 32; /* upper_32_bits(value) / nop */
> +		batch[++n] = MI_BATCH_BUFFER_END;
> +		eb->buffers_ptr = to_user_pointer(&obj);
> +
> +		memcpy(batches[i].ptr + eb->batch_start_offset,
> +		       batch, sizeof(batch));
> +
> +		gem_execbuf(fd, eb);
> +	}
> +	/* As we have been lying about the write_domain, we need to do a sync */
> +	gem_sync(fd, obj.handle);
> +}
> +
> +static void test_allocator_evict(int fd, const intel_ctx_t *ctx,
> +				 unsigned ring, int timeout)
> +{
> +	const unsigned int gen = intel_gen(intel_get_drm_devid(fd));
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	unsigned engines[I915_EXEC_RING_MASK + 1];
> +	volatile uint64_t *shared;
> +	struct timespec tv = {};
> +	struct batch *batches;
> +	unsigned nengine;
> +	unsigned count;
> +	uint64_t size, ahnd;
> +
> +	shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
> +	igt_assert(shared != MAP_FAILED);
> +
> +	nengine = 0;
> +	if (ring == ALL_ENGINES) {
> +		struct intel_execution_engine2 *e;
> +
> +		for_each_ctx_engine(fd, ctx, e) {
> +			if (!gem_class_can_store_dword(fd, e->class))
> +				continue;
> +
> +			engines[nengine++] = e->flags;
> +		}
> +	} else {
> +		engines[nengine++] = ring;
> +	}
> +	igt_require(nengine);
> +	igt_assert(nengine * 64 <= BATCH_SIZE);
> +
> +	size = gem_aperture_size(fd);
> +	if (!gem_uses_full_ppgtt(fd))
> +		size /= 2;
> +	if (size > 1ull<<32) /* Limit to 4GiB as we do not use allow-48b */
> +		size = 1ull << 32;
> +	igt_require(size < (1ull<<32) * BATCH_SIZE);
> +
> +	count = size / BATCH_SIZE + 1;
> +	igt_debug("Using %'d batches to fill %'llu aperture on %d engines\n",
> +		  count, (long long)size, nengine);
> +
> +	intel_allocator_multiprocess_start();
> +	/* Avoid allocating on the last page */
> +	ahnd = intel_allocator_open_full(fd, 0, 0, size - 4096,
> +					 INTEL_ALLOCATOR_RELOC,
> +					 ALLOC_STRATEGY_HIGH_TO_LOW);

I forgot - RELOC currently doesn't support high-to-low strategy so you 
should set it to LOW_TO_HIGH or NONE.

--
Zbigniew

> +
> +	intel_require_memory(count, BATCH_SIZE, CHECK_RAM);
> +	intel_detect_and_clear_missed_interrupts(fd);
> +
> +	igt_nsec_elapsed(&tv);
> +
> +	memset(&execbuf, 0, sizeof(execbuf));
> +	execbuf.buffer_count = 1;
> +	if (gen < 6)
> +		execbuf.flags |= I915_EXEC_SECURE;
> +
> +	batches = calloc(count, sizeof(*batches));
> +	igt_assert(batches);
> +	for (unsigned i = 0; i < count; i++) {
> +		batches[i].handle = gem_create(fd, BATCH_SIZE);
> +		batches[i].ptr =
> +			gem_mmap__device_coherent(fd, batches[i].handle,
> +						  0, BATCH_SIZE, PROT_WRITE);
> +	}
> +
> +	/* Flush all memory before we start the timer */
> +	submit(fd, gen, &execbuf, batches, count, ahnd);
> +
> +	igt_info("Setup %u batches in %.2fms\n",
> +		 count, 1e-6 * igt_nsec_elapsed(&tv));
> +
> +	igt_fork(child, nengine) {
> +		uint64_t dst, src, dst_offset, src_offset;
> +		uint64_t cycles = 0;
> +
> +		hars_petruska_f54_1_random_perturb(child);
> +		igt_permute_array(batches, count, xchg_batch);
> +		execbuf.batch_start_offset = child * 64;
> +		execbuf.flags |= engines[child];
> +
> +		dst_offset = BATCH_SIZE - child*64 - 8;
> +		if (gen >= 8)
> +			src_offset = child*64 + 3*sizeof(uint32_t);
> +		else if (gen >= 4)
> +			src_offset = child*64 + 4*sizeof(uint32_t);
> +		else
> +			src_offset = child*64 + 2*sizeof(uint32_t);
> +
> +		/* We need to open the allocator again in the new process */
> +		ahnd = intel_allocator_open_full(fd, 0, 0, size - 4096,
> +						 INTEL_ALLOCATOR_RELOC,
> +						 ALLOC_STRATEGY_HIGH_TO_LOW);
> +
> +		igt_until_timeout(timeout) {
> +			submit(fd, gen, &execbuf, batches, count, ahnd);
> +			for (unsigned i = 0; i < count; i++) {
> +				dst = *(uint64_t *)(batches[i].ptr + dst_offset);
> +				src = *(uint64_t *)(batches[i].ptr + src_offset);
> +				igt_assert_eq_u64(dst, src);
> +			}
> +			cycles++;
> +		}
> +		shared[child] = cycles;
> +		igt_info("engine[%d]: %llu cycles\n", child, (long long)cycles);
> +		intel_allocator_close(ahnd);
> +	}
> +	igt_waitchildren();
> +
> +	intel_allocator_close(ahnd);
> +	intel_allocator_multiprocess_stop();
> +
> +	for (unsigned i = 0; i < count; i++) {
> +		munmap(batches[i].ptr, BATCH_SIZE);
> +		gem_close(fd, batches[i].handle);
> +	}
> +	free(batches);
> +
> +	shared[nengine] = 0;
> +	for (unsigned i = 0; i < nengine; i++)
> +		shared[nengine] += shared[i];
> +	igt_info("Total: %llu cycles\n", (long long)shared[nengine]);
> +
> +	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
> +}
> +
> +#define test_each_engine(T, i915, ctx, e) \
> +	igt_subtest_with_dynamic(T) for_each_ctx_engine(i915, ctx, e) \
> +		igt_dynamic_f("%s", e->name)
> +
>  igt_main
>  {
> +	const struct intel_execution_engine2 *e;
>  	int fd = -1;
> +	const intel_ctx_t *ctx;
>  
>  	igt_fixture {
>  		fd = drm_open_driver_master(DRIVER_INTEL);
> @@ -888,6 +1089,8 @@ igt_main
>  		gem_require_blitter(fd);
>  		igt_require(gem_has_softpin(fd));
>  		igt_require(gem_can_store_dword(fd, 0));
> +
> +		ctx = intel_ctx_create_all_physical(fd);
>  	}
>  
>  	igt_subtest("invalid")
> @@ -923,6 +1126,12 @@ igt_main
>  
>  		igt_subtest("allocator-fork")
>  			test_allocator_fork(fd);
> +
> +		test_each_engine("allocator-evict", fd, ctx, e)
> +			test_allocator_evict(fd, ctx, e->flags, 20);
> +
> +		igt_subtest("allocator-evict-all-engines")
> +			test_allocator_evict(fd, ctx, ALL_ENGINES, 20);
>  	}
>  
>  	igt_subtest("softpin")
> @@ -950,6 +1159,8 @@ igt_main
>  	igt_subtest("evict-hang")
>  		test_evict_hang(fd);
>  
> -	igt_fixture
> +	igt_fixture {
> +		intel_ctx_destroy(fd, ctx);
>  		close(fd);
> +	}
>  }
> -- 
> 2.26.0
>