[PATCH i-g-t 1/2] lib: Add variable registers per thread (VRT) setup

Manszewski, Christoph christoph.manszewski at intel.com
Tue Mar 4 11:36:31 UTC 2025


Hi Dominik,

On 26.02.2025 11:44, Dominik Grzegorzek wrote:
> For eu debug testing purposes we need utilize all possible threads per
> eu. This is possible only if we limit number of GRFs per single thread.
> Add gpgpu_shader interface which allow us to setup that during pipeline
> creation. For now define only only mode with 96 grfs.
> 
> Signed-off-by: Dominik Grzegorzek <dominik.grzegorzek at intel.com>

Reviewed-by: Christoph Manszewski <christoph.manszewski at intel.com>

> ---
>   lib/gpgpu_fill.c            |  2 +-
>   lib/gpgpu_shader.c          | 30 +++++++++++++++++++++++++++---
>   lib/gpgpu_shader.h          |  8 ++++++++
>   lib/gpu_cmds.c              |  4 ++--
>   lib/gpu_cmds.h              |  2 +-
>   lib/iga64_generated_codes.c | 27 ++++++++++++++++++++++++++-
>   lib/xehp_media.h            |  4 +++-
>   7 files changed, 68 insertions(+), 9 deletions(-)
> 
> diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
> index fe0b8b35d..f83eee5f2 100644
> --- a/lib/gpgpu_fill.c
> +++ b/lib/gpgpu_fill.c
> @@ -359,7 +359,7 @@ void xehp_gpgpu_fillfunc(int i915,
>   	intel_bb_out(ibb, GEN7_PIPELINE_SELECT | GEN9_PIPELINE_SELECTION_MASK |
>   		  PIPELINE_SELECT_GPGPU);
>   	xehp_emit_state_base_address(ibb);
> -	xehp_emit_state_compute_mode(ibb);
> +	xehp_emit_state_compute_mode(ibb, false);
>   	xehp_emit_state_binding_table_pool_alloc(ibb);
>   	xehp_emit_cfe_state(ibb, THREADS);
>   	xehp_emit_compute_walk(ibb, x, y, width, height, &idd, color);
> diff --git a/lib/gpgpu_shader.c b/lib/gpgpu_shader.c
> index c591eb119..a63af0d23 100644
> --- a/lib/gpgpu_shader.c
> +++ b/lib/gpgpu_shader.c
> @@ -179,6 +179,9 @@ __xehp_gpgpu_execfunc(struct intel_bb *ibb,
>   				       4 * shdr->size, &idd);
>   	idd.desc2.illegal_opcode_exception_enable = shdr->illegal_opcode_exception_enable;
>   
> +	if (shdr->vrt != VRT_DISABLED)
> +		idd.desc2.registers_per_thread = shdr->vrt;
> +
>   	if (sip && sip->size)
>   		sip_offset = fill_sip(ibb, sip->instr, 4 * sip->size);
>   	else
> @@ -190,7 +193,7 @@ __xehp_gpgpu_execfunc(struct intel_bb *ibb,
>   	intel_bb_out(ibb, GEN7_PIPELINE_SELECT | GEN9_PIPELINE_SELECTION_MASK |
>   		     PIPELINE_SELECT_GPGPU);
>   	xehp_emit_state_base_address(ibb);
> -	xehp_emit_state_compute_mode(ibb);
> +	xehp_emit_state_compute_mode(ibb, shdr->vrt != VRT_DISABLED);
>   	xehp_emit_state_binding_table_pool_alloc(ibb);
>   	xehp_emit_cfe_state(ibb, THREADS);
>   
> @@ -276,7 +279,9 @@ struct gpgpu_shader *gpgpu_shader_create(int fd)
>   	shdr->max_size = 16 * 4;
>   	shdr->code = malloc(4 * shdr->max_size);
>   	shdr->labels = igt_map_create(igt_map_hash_32, igt_map_equal_32);
> +	shdr->vrt = VRT_DISABLED;
>   	igt_assert(shdr->code);
> +
>   	return shdr;
>   }
>   
> @@ -312,6 +317,19 @@ void gpgpu_shader_dump(struct gpgpu_shader *shdr)
>   			 shdr->instr[i][2], shdr->instr[i][3]);
>   }
>   
> +/**
> + * gpgpu_shader_set_vrt:
> + * @shdr: shader to be modified
> + * @vrt: one of accepted VRT modes
> + *
> + * Sets variable register per thread mode for given shader.
> + */
> +void gpgpu_shader_set_vrt(struct gpgpu_shader *shdr, enum gpgpu_shader_vrt_modes vrt)
> +{
> +	igt_assert(vrt == VRT_DISABLED || shdr->gen_ver >= 3000);
> +	shdr->vrt = vrt;
> +}
> +
>   /**
>    * gpgpu_shader__breakpoint_on:
>    * @shdr: shader to create breakpoint in
> @@ -371,14 +389,20 @@ void gpgpu_shader__nop(struct gpgpu_shader *shdr)
>    */
>   void gpgpu_shader__eot(struct gpgpu_shader *shdr)
>   {
> -	emit_iga64_code(shdr, eot, "						\n\
> +	if (shdr->vrt == VRT_96)
> +		emit_iga64_code(shdr, eot_vrt, "				\n\
> +(W)	mov (8|M0)               r80.0<1>:ud  r0.0<8;8,1>:ud			\n\
> +(W)	send.gtwy (8|M0)         null r80 src1_null     0 0x02000000 {EOT}	\n\
> +		");
> +	else
> +		emit_iga64_code(shdr, eot, "					\n\
>   (W)	mov (8|M0)               r112.0<1>:ud  r0.0<8;8,1>:ud			\n\
>   #if GEN_VER < 1250								\n\
>   (W)	send.ts (16|M0)          null r112 null 0x10000000 0x02000010 {EOT, at 1}	\n\
>   #else										\n\
>   (W)	send.gtwy (8|M0)         null r112 src1_null     0 0x02000000 {EOT}	\n\
>   #endif										\n\
> -	");
> +		");
>   }
>   
>   /**
> diff --git a/lib/gpgpu_shader.h b/lib/gpgpu_shader.h
> index 2ad6a7010..ca996d574 100644
> --- a/lib/gpgpu_shader.h
> +++ b/lib/gpgpu_shader.h
> @@ -13,6 +13,11 @@
>   struct intel_bb;
>   struct intel_buf;
>   
> +enum gpgpu_shader_vrt_modes {
> +	VRT_96 = 0x2,
> +	VRT_DISABLED,
> +};
> +
>   struct gpgpu_shader {
>   	uint32_t gen_ver;
>   	uint32_t size;
> @@ -23,6 +28,7 @@ struct gpgpu_shader {
>   	};
>   	struct igt_map *labels;
>   	bool illegal_opcode_exception_enable;
> +	enum gpgpu_shader_vrt_modes vrt;
>   };
>   
>   struct iga64_template {
> @@ -63,6 +69,8 @@ static inline uint32_t gpgpu_shader_last_instr(struct gpgpu_shader *shdr)
>   	return shdr->size / 4 - 1;
>   }
>   
> +void gpgpu_shader_set_vrt(struct gpgpu_shader *shdr, enum gpgpu_shader_vrt_modes vrt);
> +
>   void gpgpu_shader__wait(struct gpgpu_shader *shdr);
>   void gpgpu_shader__breakpoint_on(struct gpgpu_shader *shdr, uint32_t cmd_no);
>   void gpgpu_shader__breakpoint(struct gpgpu_shader *shdr);
> diff --git a/lib/gpu_cmds.c b/lib/gpu_cmds.c
> index f6a9bd09f..a6a9247dc 100644
> --- a/lib/gpu_cmds.c
> +++ b/lib/gpu_cmds.c
> @@ -1008,13 +1008,13 @@ xehp_emit_cfe_state(struct intel_bb *ibb, uint32_t threads)
>   }
>   
>   void
> -xehp_emit_state_compute_mode(struct intel_bb *ibb)
> +xehp_emit_state_compute_mode(struct intel_bb *ibb, bool vrt)
>   {
>   
>   	uint32_t dword_length = intel_graphics_ver(ibb->devid) >= IP_VER(20, 0);
>   
>   	intel_bb_out(ibb, XEHP_STATE_COMPUTE_MODE | dword_length);
> -	intel_bb_out(ibb, 0);
> +	intel_bb_out(ibb, vrt ? (0x10001) << 10 : 0); /* Enable variable number of threads */
>   
>   	if (dword_length)
>   		intel_bb_out(ibb, 0);
> diff --git a/lib/gpu_cmds.h b/lib/gpu_cmds.h
> index 1b9156a80..846d2122a 100644
> --- a/lib/gpu_cmds.h
> +++ b/lib/gpu_cmds.h
> @@ -124,7 +124,7 @@ xehp_fill_interface_descriptor(struct intel_bb *ibb,
>   			       struct xehp_interface_descriptor_data *idd);
>   
>   void
> -xehp_emit_state_compute_mode(struct intel_bb *ibb);
> +xehp_emit_state_compute_mode(struct intel_bb *ibb, bool vrt);
>   
>   void
>   xehp_emit_state_binding_table_pool_alloc(struct intel_bb *ibb);
> diff --git a/lib/iga64_generated_codes.c b/lib/iga64_generated_codes.c
> index e1f68c968..a74a8864e 100644
> --- a/lib/iga64_generated_codes.c
> +++ b/lib/iga64_generated_codes.c
> @@ -3,7 +3,7 @@
>   
>   #include "gpgpu_shader.h"
>   
> -#define MD5_SUM_IGA64_ASMS f0c9d803408104207f0427e387a8050c
> +#define MD5_SUM_IGA64_ASMS 80bb609ce27131259d19629dc74e349f
>   
>   struct iga64_template const iga64_code_gpgpu_fill[] = {
>   	{ .gen_ver = 2000, .size = 44, .code = (const uint32_t []) {
> @@ -747,6 +747,31 @@ struct iga64_template const iga64_code_eot[] = {
>   	}}
>   };
>   
> +struct iga64_template const iga64_code_eot_vrt[] = {
> +	{ .gen_ver = 2000, .size = 8, .code = (const uint32_t []) {
> +		0x800c0061, 0x50050220, 0x00460005, 0x00000000,
> +		0x800f2031, 0x00000004, 0x3000500c, 0x00000000,
> +	}},
> +	{ .gen_ver = 1270, .size = 12, .code = (const uint32_t []) {
> +		0x80030061, 0x50050220, 0x00460005, 0x00000000,
> +		0x80001901, 0x00010000, 0x00000000, 0x00000000,
> +		0x80034031, 0x00000004, 0x3000500c, 0x00000000,
> +	}},
> +	{ .gen_ver = 1260, .size = 8, .code = (const uint32_t []) {
> +		0x800c0061, 0x50050220, 0x00460005, 0x00000000,
> +		0x800f2031, 0x00000004, 0x3000500c, 0x00000000,
> +	}},
> +	{ .gen_ver = 1250, .size = 12, .code = (const uint32_t []) {
> +		0x80030061, 0x50050220, 0x00460005, 0x00000000,
> +		0x80001901, 0x00010000, 0x00000000, 0x00000000,
> +		0x80034031, 0x00000004, 0x3000500c, 0x00000000,
> +	}},
> +	{ .gen_ver = 0, .size = 8, .code = (const uint32_t []) {
> +		0x80030061, 0x50050220, 0x00460005, 0x00000000,
> +		0x80039031, 0x00000004, 0x3000500c, 0x00000000,
> +	}}
> +};
> +
>   struct iga64_template const iga64_code_nop[] = {
>   	{ .gen_ver = 1250, .size = 8, .code = (const uint32_t []) {
>   		0x00000060, 0x00000000, 0x00000000, 0x00000000,
> diff --git a/lib/xehp_media.h b/lib/xehp_media.h
> index c08288b46..fb65b8f20 100644
> --- a/lib/xehp_media.h
> +++ b/lib/xehp_media.h
> @@ -45,7 +45,9 @@ struct xehp_interface_descriptor_data {
>   		uint32_t single_program_flow: BITRANGE(18, 18);
>   		uint32_t denorm_mode: BITRANGE(19, 19);
>   		uint32_t thread_preemption_disable: BITRANGE(20, 20);
> -		uint32_t pad5: BITRANGE(21, 31);
> +		uint32_t pad5: BITRANGE(21, 25);
> +		uint32_t registers_per_thread: BITRANGE(26, 30);
> +		uint32_t pad6: BITRANGE(31, 31);
>   	} desc2;
>   
>   	struct {


More information about the igt-dev mailing list