[PATCH i-g-t] lib/intel_compute: Support testing multiple compute kernels

Sat Jan 27 13:44:07 UTC 2024

Hi Zbigniew,

Thanks for your reply and for providing a detailed explanation.

I have indeed found this change is insufficient and it's hard to support 
testing multiple compute kernels with different number of arguments 
within the current framework. The checking of testing results of compute 
kernels is also hardcoded. I also think it may be unnecessary to test a 
variety of compute kernels.

Best Regards,

Min

On Fri, Jan 26, 2024 at 3:26PM, Zbigniew Kempczyński wrote:
> On Thu, Jan 18, 2024 at 08:12:18PM +0800, Min Zhou wrote:
>> It seems that we will add more and more compute kernels for testing.
>> The function name of `run_intel_compute_kernel` seems to be able to
>> test multiple compute kernels. However it's hard to test other
>> compute kernel in testcases because the compute kernel is hardcoded
>> in the lib/intel_compute.c. So if we want to test multiple compute
>> kernels in testcases in the future, it's better to support it in
>> lib/intel_compute.
> I'm sorry for late answer.
>
> For TL;DR jump to the bottom.
>
> Regarding commit messagee - it's partially true.
> run_intel_compute_kernel() is able to run only kernels which arguments
> passed to the shader are same, I mean:
>
> opencl/compute_square_kernel.cl:
>
> __kernel void square(__global float* input, __global float* output, const unsigned int count) {
>    int i = get_global_id(0);
>    if(i < count)
>      output[i] = input[i] * input[i];
> }
>
> has direct reflection to binding table elements - see
> xehp_create_surface_state(), binding table is at 0x00001080 and
> contains
>
> 	addr_bo_buffer_batch[b++] = 0x00001000;
> 	addr_bo_buffer_batch[b++] = 0x00001040;
> 	addr_bo_buffer_batch[b++] = 0x00000000;
>
> what points to input and output data. To support different kernels
> we should generate pipelines more generic. Current code is reversed
> from compute-runtime (neo) what narrows it to use shaders with known
> in advance number of arguments.
>
> Additionally extracting shader from elf files (ocloc produces shader
> which is packed to elf) varies - for different platforms you may notice
> different sections and file arragement. If I recall correctly for some
> shaders compiled there's some prologue omitted in shader hex form packed
> to the code (I'm not sure what is for, but keeping it hangs the engine).
> That prologue is also omitted by compute-runtime by shifting kernel
> start address after this prologue.
>
> TL;DR
> -----
> This refactor doesn't support shaders with different number of
> arguments than current compute square. In IGT we just need to
> run simple compute workflow to verify submission to compute
> engine. Current code shape was made by folks in intention to
> be extendible, but mimicing of compute-runtime is hard and
> I'm not sure needed in IGT.
>
> --
> Zbigniew
>
>
>> Signed-off-by: Min Zhou <zhoumin at loongson.cn>
>> ---
>>   lib/intel_compute.c              | 66 +++++++++++++++++++++++++++-----
>>   lib/intel_compute.h              | 13 +++++--
>>   opencl/README                    |  6 +--
>>   tests/intel/gem_compute.c        |  3 +-
>>   tests/intel/xe_compute.c         |  9 +++--
>>   tests/intel/xe_compute_preempt.c |  3 +-
>>   6 files changed, 78 insertions(+), 22 deletions(-)
>>
>> diff --git a/lib/intel_compute.c b/lib/intel_compute.c
>> index eab407a0d..9c21c10c5 100644
>> --- a/lib/intel_compute.c
>> +++ b/lib/intel_compute.c
>> @@ -64,6 +64,30 @@ struct bo_execenv {
>>   	struct drm_i915_gem_exec_object2 *obj;
>>   };
>>   
>> +/*
>> + * Supported compute kernels
>> + */
>> +struct {
>> +	const char *name;
>> +	const struct intel_compute_kernels *kernels;
>> +} intel_compute_kernels_set[] = {
>> +	{ .name = COMPUTE_SQUARE,
>> +	  .kernels = intel_compute_square_kernels },
>> +	{}
>> +};
>> +
>> +static const struct intel_compute_kernels *find_intel_compute_kernels(const char *name)
>> +{
>> +	int i = 0;
>> +
>> +	for (; intel_compute_kernels_set[i].name; ++i) {
>> +		if (strcmp(intel_compute_kernels_set[i].name, name) == 0)
>> +			return intel_compute_kernels_set[i].kernels;
>> +	}
>> +
>> +	return NULL;
>> +}
>> +
>>   static void bo_execenv_create(int fd, struct bo_execenv *execenv,
>>   			      struct drm_xe_engine_class_instance *eci)
>>   {
>> @@ -1435,11 +1459,11 @@ static const struct {
>>   };
>>   
>>   static bool __run_intel_compute_kernel(int fd,
>> -				       struct drm_xe_engine_class_instance *eci)
>> +				       struct drm_xe_engine_class_instance *eci,
>> +				       const struct intel_compute_kernels *kernels)
>>   {
>>   	unsigned int ip_ver = intel_graphics_ver(intel_get_drm_devid(fd));
>>   	unsigned int batch;
>> -	const struct intel_compute_kernels *kernels = intel_compute_square_kernels;
>>   	enum intel_driver driver = get_intel_driver(fd);
>>   
>>   	for (batch = 0; batch < ARRAY_SIZE(intel_compute_batches); batch++) {
>> @@ -1472,9 +1496,16 @@ static bool __run_intel_compute_kernel(int fd,
>>   	return true;
>>   }
>>   
>> -bool run_intel_compute_kernel(int fd)
>> +bool run_intel_compute_kernel(int fd, const char *kernel_name)
>>   {
>> -	return __run_intel_compute_kernel(fd, NULL);
>> +	const struct intel_compute_kernels *kernels;
>> +
>> +	if ((kernels = find_intel_compute_kernels(kernel_name)) == NULL) {
>> +		igt_debug("Compute kernels not found for \"%s\"\n", kernel_name);
>> +		return false;
>> +	}
>> +
>> +	return __run_intel_compute_kernel(fd, NULL, kernels);
>>   }
>>   
>>   /**
>> @@ -1487,8 +1518,11 @@ bool run_intel_compute_kernel(int fd)
>>    * Returns true on success, false otherwise.
>>    */
>>   bool xe_run_intel_compute_kernel_on_engine(int fd,
>> -					   struct drm_xe_engine_class_instance *eci)
>> +					   struct drm_xe_engine_class_instance *eci,
>> +					   const char *kernel_name)
>>   {
>> +	const struct intel_compute_kernels *kernels;
>> +
>>   	if (!is_xe_device(fd)) {
>>   		igt_debug("Xe device expected\n");
>>   		return false;
>> @@ -1506,7 +1540,12 @@ bool xe_run_intel_compute_kernel_on_engine(int fd,
>>   		return false;
>>   	}
>>   
>> -	return __run_intel_compute_kernel(fd, eci);
>> +	if ((kernels = find_intel_compute_kernels(kernel_name)) == NULL) {
>> +		igt_debug("Compute kernels not found for \"%s\"\n", kernel_name);
>> +		return false;
>> +	}
>> +
>> +	return __run_intel_compute_kernel(fd, eci, kernels);
>>   }
>>   
>>   /**
>> @@ -1683,11 +1722,11 @@ static const struct {
>>   	},
>>   };
>>   
>> -static bool __run_intel_compute_kernel_preempt(int fd)
>> +static bool __run_intel_compute_kernel_preempt(int fd,
>> +				const struct intel_compute_kernels *kernels)
>>   {
>>   	unsigned int ip_ver = intel_graphics_ver(intel_get_drm_devid(fd));
>>   	unsigned int batch;
>> -	const struct intel_compute_kernels *kernels = intel_compute_square_kernels;
>>   	enum intel_driver driver = get_intel_driver(fd);
>>   
>>   	for (batch = 0; batch < ARRAY_SIZE(intel_compute_preempt_batches); batch++)
>> @@ -1732,7 +1771,14 @@ static bool __run_intel_compute_kernel_preempt(int fd)
>>    *
>>    * Returns true on success, false otherwise.
>>    */
>> -bool run_intel_compute_kernel_preempt(int fd)
>> +bool run_intel_compute_kernel_preempt(int fd, const char *kernel_name)
>>   {
>> -	return __run_intel_compute_kernel_preempt(fd);
>> +	const struct intel_compute_kernels *kernels;
>> +
>> +	if ((kernels = find_intel_compute_kernels(kernel_name)) == NULL) {
>> +		igt_debug("Compute kernels not found for \"%s\"\n", kernel_name);
>> +		return false;
>> +	}
>> +
>> +	return __run_intel_compute_kernel_preempt(fd, kernels);
>>   }
>> diff --git a/lib/intel_compute.h b/lib/intel_compute.h
>> index bba8bed94..9faf070b3 100644
>> --- a/lib/intel_compute.h
>> +++ b/lib/intel_compute.h
>> @@ -11,6 +11,11 @@
>>   
>>   #include "xe_drm.h"
>>   
>> +/*
>> + * Supported compute kernels name
>> + */
>> +#define COMPUTE_SQUARE	"compute-square"
>> +
>>   /*
>>    * OpenCL Kernels are generated using:
>>    *
>> @@ -33,7 +38,9 @@ struct intel_compute_kernels {
>>   
>>   extern const struct intel_compute_kernels intel_compute_square_kernels[];
>>   
>> -bool run_intel_compute_kernel(int fd);
>> -bool xe_run_intel_compute_kernel_on_engine(int fd, struct drm_xe_engine_class_instance *eci);
>> -bool run_intel_compute_kernel_preempt(int fd);
>> +bool run_intel_compute_kernel(int fd, const char *kernel_name);
>> +bool xe_run_intel_compute_kernel_on_engine(int fd,
>> +					   struct drm_xe_engine_class_instance *eci,
>> +					   const char *kernel_name);
>> +bool run_intel_compute_kernel_preempt(int fd, const char *kernel_name);
>>   #endif	/* INTEL_COMPUTE_H */
>> diff --git a/opencl/README b/opencl/README
>> index 2fd0687a2..4dfbe2865 100644
>> --- a/opencl/README
>> +++ b/opencl/README
>> @@ -5,10 +5,10 @@ multiple platforms.
>>   For instance, to generate compute square Kernel binaries for TGL and ADL
>>   variants, use this:
>>   
>> -    opencl/gen_opencl_kernel xe_compute_square opencl/compute_square_kernel.cl \
>> -	   xe_compute_square_kernels.c build/opencl tgllp adl-s adl-p adl-n
>> +    opencl/gen_opencl_kernel intel_compute_square opencl/compute_square_kernel.cl \
>> +	   intel_compute_square_kernels.c build/opencl tgllp adl-s adl-p adl-n
>>   
>> -    cp build/opencl/xe_compute_square_kernels.c lib/xe/
>> +    cp build/opencl/intel_compute_square_kernels.c lib/
>>   
>>   The opencl/gen_opencl_kernel requires the Intel compute runtime[1].
>>   
>> diff --git a/tests/intel/gem_compute.c b/tests/intel/gem_compute.c
>> index 8d0214c4d..ce368d2c3 100644
>> --- a/tests/intel/gem_compute.c
>> +++ b/tests/intel/gem_compute.c
>> @@ -27,7 +27,8 @@
>>   static void
>>   test_compute_square(int fd)
>>   {
>> -	igt_require_f(run_intel_compute_kernel(fd), "GPU not supported\n");
>> +	igt_require_f(run_intel_compute_kernel(fd, COMPUTE_SQUARE),
>> +		      "GPU not supported\n");
>>   }
>>   
>>   igt_main
>> diff --git a/tests/intel/xe_compute.c b/tests/intel/xe_compute.c
>> index 42f42ca0c..bc81dc04f 100644
>> --- a/tests/intel/xe_compute.c
>> +++ b/tests/intel/xe_compute.c
>> @@ -114,7 +114,7 @@ test_ccs_mode(int num_gt)
>>    * Functionality: CCS mode funtionality
>>    */
>>   static void
>> -test_compute_kernel_with_ccs_mode(int num_gt)
>> +test_compute_kernel_with_ccs_mode(int num_gt, const char *kernel_name)
>>   {
>>   	struct drm_xe_engine_class_instance *hwe;
>>   	u32 gt, m, num_slices;
>> @@ -139,7 +139,7 @@ test_compute_kernel_with_ccs_mode(int num_gt)
>>   
>>   				igt_info("GT-%d: Running compute kernel with ccs_mode %d on ccs engine %d\n",
>>   					 gt, m, hwe->engine_instance);
>> -				igt_assert_f(xe_run_intel_compute_kernel_on_engine(fd, hwe),
>> +				igt_assert_f(xe_run_intel_compute_kernel_on_engine(fd, hwe, kernel_name),
>>   					     "Unable to run compute kernel successfully\n");
>>   			}
>>   			drm_close_driver(fd);
>> @@ -163,7 +163,8 @@ test_compute_kernel_with_ccs_mode(int num_gt)
>>   static void
>>   test_compute_square(int fd)
>>   {
>> -	igt_require_f(run_intel_compute_kernel(fd), "GPU not supported\n");
>> +	igt_require_f(run_intel_compute_kernel(fd, COMPUTE_SQUARE),
>> +		      "GPU not supported\n");
>>   }
>>   
>>   igt_main
>> @@ -186,5 +187,5 @@ igt_main
>>   		test_ccs_mode(num_gt);
>>   
>>   	igt_subtest("ccs-mode-compute-kernel")
>> -		test_compute_kernel_with_ccs_mode(num_gt);
>> +		test_compute_kernel_with_ccs_mode(num_gt, COMPUTE_SQUARE);
>>   }
>> diff --git a/tests/intel/xe_compute_preempt.c b/tests/intel/xe_compute_preempt.c
>> index 31703638e..e4adefd2a 100644
>> --- a/tests/intel/xe_compute_preempt.c
>> +++ b/tests/intel/xe_compute_preempt.c
>> @@ -26,7 +26,8 @@
>>   static void
>>   test_compute_preempt(int fd)
>>   {
>> -	igt_require_f(run_intel_compute_kernel_preempt(fd), "GPU not supported\n");
>> +	igt_require_f(run_intel_compute_kernel_preempt(fd, COMPUTE_SQUARE),
>> +		      "GPU not supported\n");
>>   }
>>   
>>   igt_main
>> -- 
>> 2.39.3
>>