[PATCH i-g-t 10/11] lib/intel_compute: Use constants for thread groups and local work size
Thomas Hellström
thomas.hellstrom at linux.intel.com
Thu Mar 13 15:09:09 UTC 2025
On Tue, 2025-03-11 at 16:21 +0100, Francois Dugast wrote:
> Define new constants and use them to build the pipeline instead of
> magic values. This also helps homogenize the code to enforce a
> similar execution across GPUs. Having them grouped together in the
> file makes it easier to experiment with different values, as they
> depend on each other but where previously distributed.
>
> Signed-off-by: Francois Dugast <francois.dugast at intel.com>
> ---
> lib/intel_compute.c | 34 ++++++++++++++++++++++------------
> 1 file changed, 22 insertions(+), 12 deletions(-)
>
> diff --git a/lib/intel_compute.c b/lib/intel_compute.c
> index f5b3a88f0..068d64b24 100644
> --- a/lib/intel_compute.c
> +++ b/lib/intel_compute.c
> @@ -55,6 +55,16 @@
>
> #define
> USER_FENCE_VALUE 0xdeadbeefdeadbeefull
>
> +#define THREADS_PER_GROUP 32
> +#define THREAD_GROUP_X MAX(1, SIZE_DATA /
> (ENQUEUED_LOCAL_SIZE_X * \
> +
> ENQUEUED_LOCAL_SIZE_Y * \
> +
> ENQUEUED_LOCAL_SIZE_Z))
> +#define THREAD_GROUP_Y 1
> +#define THREAD_GROUP_Z 1
> +#define ENQUEUED_LOCAL_SIZE_X 1024
> +#define ENQUEUED_LOCAL_SIZE_Y 1
> +#define ENQUEUED_LOCAL_SIZE_Z 1
Nit: Perhaps define these before THREAD_GROUP macros to make it
clearer.
Anyway,
Reviewed-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> +
> /*
> * TGP - ThreadGroup Preemption
> * WMTP - Walker Mid Thread Preemption
> @@ -781,9 +791,9 @@ static void xehp_create_indirect_data(uint32_t
> *addr_bo_buffer_batch,
> addr_bo_buffer_batch[b++] = addr_output & 0xffffffff;
> addr_bo_buffer_batch[b++] = addr_output >> 32;
> addr_bo_buffer_batch[b++] = loop_count;
> - addr_bo_buffer_batch[b++] = 0x00000400; // Enqueued local
> size X
> - addr_bo_buffer_batch[b++] = 0x00000001; // Enqueued local
> size Y
> - addr_bo_buffer_batch[b++] = 0x00000001; // Enqueued local
> size Z
> + addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_X;
> + addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_Y;
> + addr_bo_buffer_batch[b++] = ENQUEUED_LOCAL_SIZE_Z;
> addr_bo_buffer_batch[b++] = 0x00000000;
> addr_bo_buffer_batch[b++] = 0x00000000;
> addr_bo_buffer_batch[b++] = 0x00000000;
> @@ -1164,7 +1174,7 @@ static void xehpc_compute_exec_compute(uint32_t
> *addr_bo_buffer_batch,
> addr_bo_buffer_batch[b++] = 0x00180000;
> addr_bo_buffer_batch[b++] = 0x00000000;
> addr_bo_buffer_batch[b++] = 0x00000000;
> - addr_bo_buffer_batch[b++] = 0x0c000020;
> + addr_bo_buffer_batch[b++] = 0x0c000000 | THREADS_PER_GROUP;
>
> addr_bo_buffer_batch[b++] = 0x00000008;
> addr_bo_buffer_batch[b++] = 0x00000000;
> @@ -1332,10 +1342,10 @@ static void
> xelpg_compute_exec_compute(uint32_t *addr_bo_buffer_batch,
> addr_bo_buffer_batch[b++] = 0xbe040000;
> addr_bo_buffer_batch[b++] = 0xffffffff;
> addr_bo_buffer_batch[b++] = 0x000003ff;
> - addr_bo_buffer_batch[b++] = 0x00000001;
> + addr_bo_buffer_batch[b++] = THREAD_GROUP_X;
>
> - addr_bo_buffer_batch[b++] = 0x00000001;
> - addr_bo_buffer_batch[b++] = 0x00000001;
> + addr_bo_buffer_batch[b++] = THREAD_GROUP_Y;
> + addr_bo_buffer_batch[b++] = THREAD_GROUP_Z;
> addr_bo_buffer_batch[b++] = 0x00000000;
> addr_bo_buffer_batch[b++] = 0x00000000;
> addr_bo_buffer_batch[b++] = 0x00000000;
> @@ -1350,7 +1360,7 @@ static void xelpg_compute_exec_compute(uint32_t
> *addr_bo_buffer_batch,
> addr_bo_buffer_batch[b++] = 0x00000000;
> addr_bo_buffer_batch[b++] = 0x00000000;
> addr_bo_buffer_batch[b++] = 0x00001080;
> - addr_bo_buffer_batch[b++] = 0x0c000020;
> + addr_bo_buffer_batch[b++] = 0x0c000000 | THREADS_PER_GROUP;
>
> addr_bo_buffer_batch[b++] = 0x00000008;
> addr_bo_buffer_batch[b++] = 0x00000000;
> @@ -1470,10 +1480,10 @@ static void
> xe2lpg_compute_exec_compute(uint32_t *addr_bo_buffer_batch,
> */
> addr_bo_buffer_batch[b++] = 0x00200000; // Thread
> Group ID X Dimension
> else
> - addr_bo_buffer_batch[b++] = 0x00000002;
> + addr_bo_buffer_batch[b++] = THREAD_GROUP_X;
>
> - addr_bo_buffer_batch[b++] = 0x00000001; // Thread Group ID Y
> Dimension
> - addr_bo_buffer_batch[b++] = 0x00000001; // Thread Group ID Z
> Dimension
> + addr_bo_buffer_batch[b++] = THREAD_GROUP_Y;
> + addr_bo_buffer_batch[b++] = THREAD_GROUP_Z;
> addr_bo_buffer_batch[b++] = 0x00000000;
> addr_bo_buffer_batch[b++] = 0x00000000;
> addr_bo_buffer_batch[b++] = 0x00000000;
> @@ -1494,7 +1504,7 @@ static void
> xe2lpg_compute_exec_compute(uint32_t *addr_bo_buffer_batch,
>
> addr_bo_buffer_batch[b++] = 0x00000000;
> addr_bo_buffer_batch[b++] = 0x00000000;
> - addr_bo_buffer_batch[b++] = 0x0c000020;
> + addr_bo_buffer_batch[b++] = 0x0c000000 | THREADS_PER_GROUP;
> addr_bo_buffer_batch[b++] = 0x00000000;
> addr_bo_buffer_batch[b++] = 0x00000000;
> addr_bo_buffer_batch[b++] = 0x00001047;
More information about the igt-dev
mailing list