[igt-dev] [PATCH i-g-t] tests/i915/gem_exec_capture : Add support for local memory

Zbigniew Kempczyński zbigniew.kempczynski at intel.com
Thu Apr 28 19:32:19 UTC 2022


On Mon, Apr 25, 2022 at 10:56:59AM +0530, sai.gowtham.ch at intel.com wrote:
> From: Ch Sai Gowtham <sai.gowtham.ch at intel.com>
> 
> Adding local memory support to many-4K-zero subtest and used
> new macro for_each_memory_region for memory regioning.
> 
> Signed-off-by: Ch Sai Gowtham <sai.gowtham.ch at intel.com>
> Cc: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> ---
>  tests/i915/gem_exec_capture.c | 100 ++++++++++++++++++++--------------
>  1 file changed, 59 insertions(+), 41 deletions(-)
> 
> diff --git a/tests/i915/gem_exec_capture.c b/tests/i915/gem_exec_capture.c
> index 60f8df04..0b6f3ffb 100644
> --- a/tests/i915/gem_exec_capture.c
> +++ b/tests/i915/gem_exec_capture.c
> @@ -250,7 +250,8 @@ static void wait_to_die(int fence_out)
>  
>  static void __capture1(int fd, int dir, uint64_t ahnd, const intel_ctx_t *ctx,
>  		       const struct intel_execution_engine2 *e,
> -		       uint32_t target, uint64_t target_size, uint32_t region)
> +		       uint32_t target, uint64_t target_size,
> +		       struct drm_i915_gem_memory_class_instance *region)
>  {
>  	const unsigned int gen = intel_gen(intel_get_drm_devid(fd));
>  	struct drm_i915_gem_exec_object2 obj[4];
> @@ -268,13 +269,13 @@ static void __capture1(int fd, int dir, uint64_t ahnd, const intel_ctx_t *ctx,
>  	saved_engine = configure_hangs(fd, e, ctx->id);
>  
>  	memset(obj, 0, sizeof(obj));
> -	obj[SCRATCH].handle = gem_create_in_memory_regions(fd, 4096, region);
> +	obj[SCRATCH].handle = gem_create_in_memory_region_list(fd, 4096, region, 1);
>  	obj[SCRATCH].flags = EXEC_OBJECT_WRITE;
>  	obj[CAPTURE].handle = target;
>  	obj[CAPTURE].flags = EXEC_OBJECT_CAPTURE;
>  	obj[NOCAPTURE].handle = gem_create(fd, 4096);
>  
> -	obj[BATCH].handle = gem_create_in_memory_regions(fd, 4096, region);
> +	obj[BATCH].handle = gem_create_in_memory_region_list(fd, 4096, region, 1);
>  	obj[BATCH].relocs_ptr = (uintptr_t)reloc;
>  	obj[BATCH].relocation_count = !ahnd ? ARRAY_SIZE(reloc) : 0;
>  
> @@ -384,12 +385,13 @@ static void __capture1(int fd, int dir, uint64_t ahnd, const intel_ctx_t *ctx,
>  }
>  
>  static void capture(int fd, int dir, const intel_ctx_t *ctx,
> -		    const struct intel_execution_engine2 *e, uint32_t region)
> +		const struct intel_execution_engine2 *e,
> +		struct drm_i915_gem_memory_class_instance *region)
>  {
>  	uint32_t handle;
>  	uint64_t ahnd, obj_size = 4096;
>  
> -	igt_assert_eq(__gem_create_in_memory_regions(fd, &handle, &obj_size, region), 0);
> +	igt_assert_eq(__gem_create_in_memory_region_list(fd, &handle, &obj_size, region, 1), 0);
>  	ahnd = get_reloc_ahnd(fd, ctx->id);
>  
>  	__capture1(fd, dir, ahnd, ctx, e, handle, obj_size, region);
> @@ -415,7 +417,8 @@ static struct offset *
>  __captureN(int fd, int dir, uint64_t ahnd, const intel_ctx_t *ctx,
>  	   const struct intel_execution_engine2 *e,
>  	   unsigned int size, int count,
> -	   unsigned int flags, int *_fence_out)
> +	   unsigned int flags, int *_fence_out,
> +	   struct drm_i915_gem_memory_class_instance *region)
>  #define INCREMENTAL 0x1
>  #define ASYNC 0x2
>  {
> @@ -436,9 +439,10 @@ __captureN(int fd, int dir, uint64_t ahnd, const intel_ctx_t *ctx,
>  	obj = calloc(count + 2, sizeof(*obj));
>  	igt_assert(obj);
>  
> -	obj[0].handle = gem_create(fd, 4096);
> +	obj[0].handle = gem_create_in_memory_region_list(fd, 4096, region, 1);
>  	obj[0].offset = get_offset(ahnd, obj[0].handle, 4096, 0);
> -	obj[0].flags = EXEC_OBJECT_WRITE | (ahnd ? EXEC_OBJECT_PINNED : 0);
> +	obj[0].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_WRITE |
> +			(ahnd ? EXEC_OBJECT_PINNED : 0);
>  
>  	for (i = 0; i < count; i++) {
>  		obj[i + 1].handle = gem_create(fd, size);

I would expect that object being subject to dump would also reside 
in region memory. This opens can of worms (see below).


> @@ -459,11 +463,12 @@ __captureN(int fd, int dir, uint64_t ahnd, const intel_ctx_t *ctx,
>  		}
>  	}
>  
> -	obj[count + 1].handle = gem_create(fd, 4096);
> +	obj[count + 1].handle = gem_create_in_memory_region_list(fd, 4096, region, 1);
>  	obj[count + 1].relocs_ptr = (uintptr_t)reloc;
>  	obj[count + 1].relocation_count = !ahnd ? ARRAY_SIZE(reloc) : 0;
>  	obj[count + 1].offset = get_offset(ahnd, obj[count + 1].handle, 4096, 0);
> -	obj[count + 1].flags = ahnd ? EXEC_OBJECT_PINNED : 0;
> +	obj[count + 1].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
> +				(ahnd ? EXEC_OBJECT_PINNED : 0);
>  
>  	memset(reloc, 0, sizeof(reloc));
>  	reloc[0].target_handle = obj[count + 1].handle; /* recurse */
> @@ -585,14 +590,16 @@ __captureN(int fd, int dir, uint64_t ahnd, const intel_ctx_t *ctx,
>  		saved = configure_hangs(fd, e, ctx->id); \
>  	} while(0)
>  
> -static void many(int fd, int dir, uint64_t size, unsigned int flags)
> +static void many(int fd, int dir, uint64_t size, unsigned int flags,
> +		struct drm_i915_gem_memory_class_instance *region)
>  {
>  	const struct intel_execution_engine2 *e;
>  	const intel_ctx_t *ctx;
> -	uint64_t ram, gtt, ahnd;
> +	uint64_t ram, gtt, ahnd, lmem_size;
>  	unsigned long count, blobs;
>  	struct offset *offsets;
>  	struct gem_engine_properties saved_engine;
> +	struct drm_i915_query_memory_regions *info;
>  
>  	find_first_available_engine(fd, ctx, e, saved_engine);
>  
> @@ -601,13 +608,22 @@ static void many(int fd, int dir, uint64_t size, unsigned int flags)
>  	igt_debug("Available objects in GTT:%"PRIu64", RAM:%"PRIu64"\n",
>  		  gtt, ram);
>  
> -	count = min(gtt, ram) / 4;
> -	igt_require(count > 1);
> +	info = gem_get_query_memory_regions(fd);
> +	lmem_size = gpu_meminfo_region_total_size(info, I915_MEMORY_CLASS_DEVICE) / size;
> +
> +	if (region->memory_class == I915_MEMORY_CLASS_SYSTEM) {
> +		count = min(gtt, ram) / 4;
> +		igt_require(count > 1);
> +		intel_require_memory(count, size, CHECK_RAM);
> +	} else {
> +		count = min(gtt, lmem_size) / 4;
> +		igt_require(count > 1);
> +	}

This is our can of worms. Region size / 4 which should be reasonable
from mathematical point of view is not according to driver / hw requirements.
When you start creating objects in lmem and you're requesting 4K size real
allocation size can be bigger, for DG2 it is 64K. That's why /4 is not correct
and it should be altered. Another problem is that lmem is also occupied by
flatccs + likely gtt, so room for allocations is smaller than we calculate (/4).
The last thing is that even you calculate it correctly test will never end
due to time required to dump lmem bo via cpu mappings. 

At the moment adopt captureN to be aware of object size and do regioning 
in many-4K-incremental (see comment below).

>  
> -	intel_require_memory(count, size, CHECK_RAM);
>  	ahnd = get_reloc_ahnd(fd, ctx->id);
>  
> -	offsets = __captureN(fd, dir, ahnd, ctx, e, size, count, flags, NULL);
> +	offsets = __captureN(fd, dir, ahnd, ctx, e, size, count, flags, NULL,
> +			region);
>  
>  	blobs = check_error_state(dir, offsets, count, size, !!(flags & INCREMENTAL));
>  	igt_info("Captured %lu %"PRId64"-blobs out of a total of %lu\n",
> @@ -620,7 +636,8 @@ static void many(int fd, int dir, uint64_t size, unsigned int flags)
>  }
>  
>  static void prioinv(int fd, int dir, const intel_ctx_t *ctx,
> -		    const struct intel_execution_engine2 *e)
> +		    const struct intel_execution_engine2 *e,
> +		    struct drm_i915_gem_memory_class_instance *region)
>  {
>  	const uint32_t bbe = MI_BATCH_BUFFER_END;
>  	struct drm_i915_gem_exec_object2 obj = {
> @@ -677,7 +694,8 @@ static void prioinv(int fd, int dir, const intel_ctx_t *ctx,
>  		/* Reopen the allocator in the new process. */
>  		ahnd = get_reloc_ahnd(fd, ctx2->id);
>  
> -		free(__captureN(fd, dir, ahnd, ctx2, e, size, count, ASYNC, &fence_out));
> +		free(__captureN(fd, dir, ahnd, ctx2, e, size, count, ASYNC,
> +					&fence_out, region));
>  		put_ahnd(ahnd);
>  
>  		write(link[1], &fd, sizeof(fd)); /* wake the parent up */
> @@ -708,7 +726,8 @@ static void prioinv(int fd, int dir, const intel_ctx_t *ctx,
>  	put_ahnd(ahnd);
>  }
>  
> -static void userptr(int fd, int dir)
> +static void userptr(int fd, int dir,
> +		struct drm_i915_gem_memory_class_instance *region)
>  {
>  	const struct intel_execution_engine2 *e;
>  	const intel_ctx_t *ctx;
> @@ -716,7 +735,7 @@ static void userptr(int fd, int dir)
>  	uint64_t ahnd;
>  	void *ptr;
>  	int obj_size = 4096;
> -	uint32_t system_region = INTEL_MEMORY_REGION_ID(I915_SYSTEM_MEMORY, 0);
> +
>  	struct gem_engine_properties saved_engine;
>  
>  	find_first_available_engine(fd, ctx, e, saved_engine);
> @@ -726,7 +745,8 @@ static void userptr(int fd, int dir)
>  	igt_require(__gem_userptr(fd, ptr, obj_size, 0, 0, &handle) == 0);
>  	ahnd = get_reloc_ahnd(fd, ctx->id);
>  
> -	__capture1(fd, dir, ahnd, ctx, e, handle, obj_size, system_region);
> +	__capture1(fd, dir, ahnd, ctx, e, handle, obj_size,
> +			region);
>  
>  	gem_close(fd, handle);
>  	put_ahnd(ahnd);
> @@ -764,9 +784,10 @@ igt_main
>  	int fd = -1;
>  	int dir = -1;
>  	struct drm_i915_query_memory_regions *query_info;
> -	struct igt_collection *regions, *set;
> -	char *sub_name;
> -	uint32_t region;
> +	struct drm_i915_gem_memory_class_instance region_smem = {
> +		.memory_class = I915_MEMORY_CLASS_SYSTEM,
> +		.memory_instance = 0,
> +	};
>  
>  	igt_fixture {
>  		int gen;
> @@ -788,56 +809,53 @@ igt_main
>  		igt_require(safer_strlen(igt_sysfs_get(dir, "error")) > 0);
>  		query_info = gem_get_query_memory_regions(fd);
>  		igt_assert(query_info);
> -		set = get_memory_region_set(query_info,
> -				I915_SYSTEM_MEMORY,
> -				I915_DEVICE_MEMORY);
>  	}
>  
>  	test_each_engine("capture", fd, ctx, e) {
> -		for_each_combination(regions, 1, set) {
> -			sub_name = memregion_dynamic_subtest_name(regions);
> -			region = igt_collection_get_value(regions, 0);
> -			igt_dynamic_f("%s-%s", e->name, sub_name)
> -				capture(fd, dir, ctx, e, region);
> -			free(sub_name);
> +		for_each_memory_region(r, fd) {
> +			igt_dynamic_f("%s-%s", e->name, r->name)
> +				capture(fd, dir, ctx, e, &r->ci);
>  		}
>  	}
>  
> -	igt_subtest_f("many-4K-zero") {
> +	igt_subtest_with_dynamic("many-4K-zero") {
>  		igt_require(gem_can_store_dword(fd, 0));
> -		many(fd, dir, 1<<12, 0);
> +		for_each_memory_region(r, fd) {
> +			igt_dynamic_f("%s", r->name)
> +				many(fd, dir, 1<<12, 0, &r->ci);
> +		}

If you can just migrate regioning support from many-4K-zero -> many-4K-incremental.
This will create 'error' file more interesting.

Code shape is almost done, we just need to set up test boundaries.

--
Zbigniew

>  	}
>  
>  	igt_subtest_f("many-4K-incremental") {
>  		igt_require(gem_can_store_dword(fd, 0));
> -		many(fd, dir, 1<<12, INCREMENTAL);
> +		many(fd, dir, 1<<12, INCREMENTAL, &region_smem);
>  	}
>  
>  	igt_subtest_f("many-2M-zero") {
>  		igt_require(gem_can_store_dword(fd, 0));
> -		many(fd, dir, 2<<20, 0);
> +		many(fd, dir, 2<<20, 0, &region_smem);
>  	}
>  
>  	igt_subtest_f("many-2M-incremental") {
>  		igt_require(gem_can_store_dword(fd, 0));
> -		many(fd, dir, 2<<20, INCREMENTAL);
> +		many(fd, dir, 2<<20, INCREMENTAL, &region_smem);
>  	}
>  
>  	igt_subtest_f("many-256M-incremental") {
>  		igt_require(gem_can_store_dword(fd, 0));
> -		many(fd, dir, 256<<20, INCREMENTAL);
> +		many(fd, dir, 256<<20, INCREMENTAL, &region_smem);
>  	}
>  
>  	/* And check we can read from different types of objects */
>  
>  	igt_subtest_f("userptr") {
>  		igt_require(gem_can_store_dword(fd, 0));
> -		userptr(fd, dir);
> +		userptr(fd, dir, &region_smem);
>  	}
>  
>  	test_each_engine("pi", fd, ctx, e)
>  		igt_dynamic_f("%s", (e)->name)
> -			prioinv(fd, dir, ctx, e);
> +			prioinv(fd, dir, ctx, e, &region_smem);
>  
>  	igt_fixture {
>  		close(dir);
> -- 
> 2.35.1
> 


More information about the igt-dev mailing list