[PATCH i-g-t, v3 10/11] lib/intel_compute: Use constants for thread groups and local work size

Zbigniew Kempczyński zbigniew.kempczynski at intel.com
Tue Apr 15 09:57:37 UTC 2025


On Mon, Apr 14, 2025 at 06:39:05PM +0200, Francois Dugast wrote:
> Define new constants and use them to build the pipeline instead of
> magic values. This also helps homogenize the code to enforce a
> similar execution across GPUs. Having them grouped together in the
> file makes it easier to experiment with different values, as they
> depend on each other but where previously distributed.
> 
> Signed-off-by: Francois Dugast <francois.dugast at intel.com>
> ---
>  lib/intel_compute.c | 71 ++++++++++++++++++++++++++-------------------
>  1 file changed, 41 insertions(+), 30 deletions(-)
> 
> diff --git a/lib/intel_compute.c b/lib/intel_compute.c
> index 6b779c340..d4609da15 100644
> --- a/lib/intel_compute.c
> +++ b/lib/intel_compute.c
> @@ -55,6 +55,17 @@
>  
>  #define USER_FENCE_VALUE			0xdeadbeefdeadbeefull
>  #define MAGIC_LOOP_STOP			0x12341234
> +
> +#define THREADS_PER_GROUP		32
> +#define THREAD_GROUP_X			MAX(1, SIZE_DATA / (ENQUEUED_LOCAL_SIZE_X * \
> +							    ENQUEUED_LOCAL_SIZE_Y * \
> +							    ENQUEUED_LOCAL_SIZE_Z))
> +#define THREAD_GROUP_Y			1
> +#define THREAD_GROUP_Z			1
> +#define ENQUEUED_LOCAL_SIZE_X		1024
> +#define ENQUEUED_LOCAL_SIZE_Y		1
> +#define ENQUEUED_LOCAL_SIZE_Z		1
> +

Functionally equal and more descriptive:

Reviewed-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>

--
Zbigniew
>  /*
>   * TGP  - ThreadGroup Preemption
>   * WMTP - Walker Mid Thread Preemption
> @@ -784,9 +795,9 @@ static void xehp_create_indirect_data(uint32_t *addr_bo_buffer_batch,
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
> -	addr_bo_buffer_batch[b++] = 0x00000400;
> -	addr_bo_buffer_batch[b++] = 0x00000001;
> -	addr_bo_buffer_batch[b++] = 0x00000001;
> +	addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_X;
> +	addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_Y;
> +	addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_Z;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
> @@ -1061,9 +1072,9 @@ static void xehpc_create_indirect_data(uint32_t *addr_bo_buffer_batch,
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
> -	addr_bo_buffer_batch[b++] = 0x00000400;
> -	addr_bo_buffer_batch[b++] = 0x00000001;
> -	addr_bo_buffer_batch[b++] = 0x00000001;
> +	addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_X;
> +	addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_Y;
> +	addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_Z;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = addr_input & 0xffffffff;
> @@ -1071,9 +1082,9 @@ static void xehpc_create_indirect_data(uint32_t *addr_bo_buffer_batch,
>  	addr_bo_buffer_batch[b++] = addr_output & 0xffffffff;
>  	addr_bo_buffer_batch[b++] = addr_output >> 32;
>  	addr_bo_buffer_batch[b++] = loop_count;
> -	addr_bo_buffer_batch[b++] = 0x00000400;
> -	addr_bo_buffer_batch[b++] = 0x00000001;
> -	addr_bo_buffer_batch[b++] = 0x00000001;
> +	addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_X;
> +	addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_Y;
> +	addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_Z;
>  }
>  
>  static void xehpc_compute_exec_compute(uint32_t *addr_bo_buffer_batch,
> @@ -1164,7 +1175,7 @@ static void xehpc_compute_exec_compute(uint32_t *addr_bo_buffer_batch,
>  	addr_bo_buffer_batch[b++] = 0x00180000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
> -	addr_bo_buffer_batch[b++] = 0x0c000020;
> +	addr_bo_buffer_batch[b++] = 0x0c000000 | THREADS_PER_GROUP;
>  
>  	addr_bo_buffer_batch[b++] = 0x00000008;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
> @@ -1275,9 +1286,9 @@ static void xelpg_create_indirect_data(uint32_t *addr_bo_buffer_batch,
>  	addr_bo_buffer_batch[b++] = addr_output & 0xffffffff;
>  	addr_bo_buffer_batch[b++] = addr_output >> 32;
>  	addr_bo_buffer_batch[b++] = loop_count;
> -	addr_bo_buffer_batch[b++] = 0x00000400; // Enqueued local size X
> -	addr_bo_buffer_batch[b++] = 0x00000001; // Enqueued local size Y
> -	addr_bo_buffer_batch[b++] = 0x00000001; // Enqueued local size Z
> +	addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_X;
> +	addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_Y;
> +	addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_Z;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
> @@ -1357,10 +1368,10 @@ static void xelpg_compute_exec_compute(uint32_t *addr_bo_buffer_batch,
>  	addr_bo_buffer_batch[b++] = 0xbe040000;
>  	addr_bo_buffer_batch[b++] = 0xffffffff;
>  	addr_bo_buffer_batch[b++] = 0x000003ff;
> -	addr_bo_buffer_batch[b++] = 0x00000001;
> +	addr_bo_buffer_batch[b++] = THREAD_GROUP_X;
>  
> -	addr_bo_buffer_batch[b++] = 0x00000001;
> -	addr_bo_buffer_batch[b++] = 0x00000001;
> +	addr_bo_buffer_batch[b++] = THREAD_GROUP_Y;
> +	addr_bo_buffer_batch[b++] = THREAD_GROUP_Z;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
> @@ -1375,7 +1386,7 @@ static void xelpg_compute_exec_compute(uint32_t *addr_bo_buffer_batch,
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00010080;
> -	addr_bo_buffer_batch[b++] = 0x0c000020;
> +	addr_bo_buffer_batch[b++] = 0x0c000000 | THREADS_PER_GROUP;
>  
>  	addr_bo_buffer_batch[b++] = 0x00000008;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
> @@ -1388,9 +1399,9 @@ static void xelpg_compute_exec_compute(uint32_t *addr_bo_buffer_batch,
>  
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
> -	addr_bo_buffer_batch[b++] = 0x00000400;
> -	addr_bo_buffer_batch[b++] = 0x00000001;
> -	addr_bo_buffer_batch[b++] = 0x00000001;
> +	addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_X;
> +	addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_Y;
> +	addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_Z;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  
> @@ -1495,10 +1506,10 @@ static void xe2lpg_compute_exec_compute(uint32_t *addr_bo_buffer_batch,
>  		 */
>  		addr_bo_buffer_batch[b++] = 0x00200000; // Thread Group ID X Dimension
>  	else
> -		addr_bo_buffer_batch[b++] = 0x00000002;
> +		addr_bo_buffer_batch[b++] = THREAD_GROUP_X;
>  
> -	addr_bo_buffer_batch[b++] = 0x00000001; // Thread Group ID Y Dimension
> -	addr_bo_buffer_batch[b++] = 0x00000001; // Thread Group ID Z Dimension
> +	addr_bo_buffer_batch[b++] = THREAD_GROUP_Y;
> +	addr_bo_buffer_batch[b++] = THREAD_GROUP_Z;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
> @@ -1519,7 +1530,7 @@ static void xe2lpg_compute_exec_compute(uint32_t *addr_bo_buffer_batch,
>  
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
> -	addr_bo_buffer_batch[b++] = 0x0c000020;
> +	addr_bo_buffer_batch[b++] = 0x0c000000 | THREADS_PER_GROUP;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00001047;
> @@ -1530,9 +1541,9 @@ static void xe2lpg_compute_exec_compute(uint32_t *addr_bo_buffer_batch,
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
> -	addr_bo_buffer_batch[b++] = 0x00000400;
> -	addr_bo_buffer_batch[b++] = 0x00000001;
> -	addr_bo_buffer_batch[b++] = 0x00000001;
> +	addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_X;
> +	addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_Y;
> +	addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_Z;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  
> @@ -1554,9 +1565,9 @@ static void xe2_create_indirect_data_inc_kernel(uint32_t *addr_bo_buffer_batch,
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
> -	addr_bo_buffer_batch[b++] = 0x00000400;
> -	addr_bo_buffer_batch[b++] = 0x00000001;
> -	addr_bo_buffer_batch[b++] = 0x00000001;
> +	addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_X;
> +	addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_Y;
> +	addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_Z;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
>  	addr_bo_buffer_batch[b++] = 0x00000000;
> -- 
> 2.43.0
> 


More information about the igt-dev mailing list