[PATCH i-g-t 1/1] tests/intel/xe_exec_store: Introduce long-shader tests

Thu Nov 28 13:25:45 UTC 2024

On Mon, Nov 25, 2024 at 02:56:45PM +0100, Dominik Karol Piątkowski wrote:
> Introduce 4 tests:
> - long-shader-bb-sram-target-sram
> - long-shader-bb-sram-target-vram
> - long-shader-bb-vram-target-sram
> - long-shader-bb-vram-target-vram
> 
> These tests are core version of xe_eudebug_online at writes-caching* tests.
> 
> Each test writes incrementing values to 2-page-long target surface using
> long shader. The bb is searched for full shader, expecting it to exist.
> The target surface is checked against written values. Each test places
> bb and surface in different configuration of memory regions in order to
> validate memory coherency.
> 
> Signed-off-by: Dominik Karol Piątkowski <dominik.karol.piatkowski at intel.com>
> ---
>  tests/intel/xe_exec_store.c | 130 ++++++++++++++++++++++++++++++++++++
>  1 file changed, 130 insertions(+)
> 
> diff --git a/tests/intel/xe_exec_store.c b/tests/intel/xe_exec_store.c
> index 1375ee906..bb7e7987d 100644
> --- a/tests/intel/xe_exec_store.c
> +++ b/tests/intel/xe_exec_store.c
> @@ -12,6 +12,10 @@
>  #include "xe/xe_query.h"
>  #include "xe_drm.h"
>  
> +#include "intel_pat.h"
> +#include "intel_mocs.h"
> +#include "gpgpu_shader.h"
> +
>  /**
>   * TEST: Tests to verify store dword functionality.
>   * Category: Core
> @@ -334,6 +338,120 @@ static void persistent(int fd)
>  	xe_vm_destroy(fd, vm);
>  }
>  
> +#define LONG_SHADER_VALUE(n)	(0xcafe0000 + (n))
> +
> +/**
> + * SUBTEST: long-shader-bb-%s-target-%s
> + * DESCRIPTION: Write incrementing values to 2-page-long target surface using long shader. Check if
> + *		the bb contains full shader. Check if all written values are in the target surface.
> + *		Place bb and surface in various memory regions to validate memory coherency.
> + *
> + * arg[1]:
> + *
> + * @sram: bb in SRAM
> + * @vram: bb in VRAM
> + *
> + * arg[2]:
> + *
> + * @sram: target surface in SRAM
> + * @vram: target surface in VRAM
> + */
> +static void long_shader(int fd, struct drm_xe_engine_class_instance *hwe,
> +			bool bb_in_vram, bool target_in_vram)
> +{
> +	const uint64_t target_offset = 0x1a000000;
> +	const uint64_t bb_offset = 0x1b000000;
> +	const size_t bb_size = 32768;
> +	uint32_t vm_id;
> +	uint32_t exec_queue;
> +	const unsigned int instruction_count = 128;
> +	const unsigned int walker_dim_x = 4;
> +	const unsigned int walker_dim_y = 8;
> +	const unsigned int surface_dim_x = 64;
> +	const unsigned int surface_dim_y = instruction_count;
> +	struct gpgpu_shader *shader;
> +	struct intel_buf *buf;
> +	struct intel_bb *ibb;
> +	uint32_t *ptr;
> +	uint64_t bb_region;
> +	uint64_t target_region;
> +
> +	if (bb_in_vram || target_in_vram)
> +		igt_skip_on_f(!xe_has_vram(fd), "Device does not have VRAM.\n");
> +
> +	bb_region = bb_in_vram ? vram_memory(fd, hwe->gt_id) : system_memory(fd);
> +	target_region = target_in_vram ? vram_memory(fd, hwe->gt_id) : system_memory(fd);
> +
> +	buf = intel_buf_create_full(buf_ops_create(fd), 0, surface_dim_x / 4, surface_dim_y,
> +				    32, 0, I915_TILING_NONE, 0, 0, 0, target_region,
> +				    DEFAULT_PAT_INDEX, DEFAULT_MOCS_INDEX);
> +	buf->addr.offset = target_offset;
> +
> +	vm_id = xe_vm_create(fd, DRM_XE_VM_CREATE_FLAG_LR_MODE, 0);
> +	exec_queue = xe_exec_queue_create(fd, vm_id, hwe, 0);
> +
> +	ibb = intel_bb_create_with_context_in_region(fd, exec_queue, vm_id, NULL, bb_size, bb_region);
> +	intel_bb_remove_object(ibb, ibb->handle, ibb->batch_offset, ibb->size);
> +	intel_bb_add_object(ibb, ibb->handle, ibb->size, bb_offset, ibb->alignment, false);
> +	ibb->batch_offset = bb_offset;
> +
> +	intel_bb_set_lr_mode(ibb, true);
> +
> +	shader = gpgpu_shader_create(fd);
> +	gpgpu_shader__nop(shader);
> +	for (int i = 0; i < instruction_count; i++)
> +		gpgpu_shader__common_target_write_u32(shader, i, LONG_SHADER_VALUE(i));
> +	gpgpu_shader__nop(shader);
> +	gpgpu_shader__eot(shader);
> +
> +	gpgpu_shader_exec(ibb, buf, walker_dim_x, walker_dim_y, shader, NULL, 0, 0);
> +	intel_bb_sync(ibb);
> +
> +	ptr = xe_bo_map(fd, ibb->handle, ibb->size);
> +	igt_assert_f(memmem(ptr, ibb->size, shader->code, shader->size * sizeof(uint32_t)),
> +		     "Could not find kernel in bb!\n");
> +	gem_munmap(ptr, ibb->size);
> +
> +	gpgpu_shader_destroy(shader);
> +
> +	ptr = xe_bo_map(fd, buf->handle, buf->surface[0].size);
> +	for (int i = 0; i < buf->surface[0].size / 4; i += 16)
> +		for (int j = 0; j < 4; j++)
> +			igt_assert(ptr[i + j] == LONG_SHADER_VALUE(i / 16));
> +	gem_munmap(ptr, buf->surface[0].size);
> +
> +	intel_bb_destroy(ibb);
> +	xe_exec_queue_destroy(fd, exec_queue);
> +	xe_vm_destroy(fd, vm_id);
> +	free(buf);
> +}
> +
> +#define is_compute_on_gt(__e, __gt) (((__e)->engine_class == DRM_XE_ENGINE_CLASS_RENDER || \
> +				      (__e)->engine_class == DRM_XE_ENGINE_CLASS_COMPUTE) && \
> +				      (__e)->gt_id == (__gt))
> +
> +static struct drm_xe_engine_class_instance *pick_compute(int fd, int gt)
> +{
> +	struct drm_xe_engine_class_instance *hwe;
> +	int count = 0;
> +
> +	xe_for_each_engine(fd, hwe)
> +		if (is_compute_on_gt(hwe, gt))
> +			count++;
> +
> +	xe_for_each_engine(fd, hwe)
> +		if (is_compute_on_gt(hwe, gt) && rand() % count-- == 0)
> +			return hwe;
> +
> +	return NULL;
> +}
> +
> +#define test_gt_render_or_compute(t, fd, __hwe) \
> +	igt_subtest_with_dynamic(t) \
> +		for (int gt = 0; (__hwe = pick_compute(fd, gt)); gt++) \
> +			igt_dynamic_f("%s%d", xe_engine_class_string(__hwe->engine_class), \
> +				      hwe->engine_instance)
> +
>  igt_main
>  {
>  	struct drm_xe_engine_class_instance *hwe;
> @@ -378,6 +496,18 @@ igt_main
>  	igt_subtest("persistent")
>  		persistent(fd);
>  
> +	test_gt_render_or_compute("long-shader-bb-sram-target-sram", fd, hwe)
> +		long_shader(fd, hwe, false, false);
> +
> +	test_gt_render_or_compute("long-shader-bb-sram-target-vram", fd, hwe)
> +		long_shader(fd, hwe, false, true);
> +
> +	test_gt_render_or_compute("long-shader-bb-vram-target-sram", fd, hwe)
> +		long_shader(fd, hwe, true, false);
> +
> +	test_gt_render_or_compute("long-shader-bb-vram-target-vram", fd, hwe)
> +		long_shader(fd, hwe, true, true);

I prefer single subtest like 'long-shader-bb-check' with dynamic
subtests which depends on number of accessible memory regions.
In above calls hwe seems to be 'some' engine, which is some dangling
engine selected from previous test. I bet noone will take a look to
your test if some additional will be added in between.

I see you've properly selected vram accessible for gt in the test.
I think you should iterate over all accessible memory regions, especially
if there's more than single gt.

--
Zbigniew

> +
>  	igt_fixture {
>  		xe_device_put(fd);
>  		close(fd);
> -- 
> 2.34.1
>