[PATCH i-g-t 1/2] lib: Add variable registers per thread (VRT) setup

Dominik Grzegorzek dominik.grzegorzek at intel.com
Wed Feb 26 10:44:35 UTC 2025


For eu debug testing purposes we need utilize all possible threads per
eu. This is possible only if we limit number of GRFs per single thread.
Add gpgpu_shader interface which allow us to setup that during pipeline
creation. For now define only only mode with 96 grfs.

Signed-off-by: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
---
 lib/gpgpu_fill.c            |  2 +-
 lib/gpgpu_shader.c          | 30 +++++++++++++++++++++++++++---
 lib/gpgpu_shader.h          |  8 ++++++++
 lib/gpu_cmds.c              |  4 ++--
 lib/gpu_cmds.h              |  2 +-
 lib/iga64_generated_codes.c | 27 ++++++++++++++++++++++++++-
 lib/xehp_media.h            |  4 +++-
 7 files changed, 68 insertions(+), 9 deletions(-)

diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
index fe0b8b35d..f83eee5f2 100644
--- a/lib/gpgpu_fill.c
+++ b/lib/gpgpu_fill.c
@@ -359,7 +359,7 @@ void xehp_gpgpu_fillfunc(int i915,
 	intel_bb_out(ibb, GEN7_PIPELINE_SELECT | GEN9_PIPELINE_SELECTION_MASK |
 		  PIPELINE_SELECT_GPGPU);
 	xehp_emit_state_base_address(ibb);
-	xehp_emit_state_compute_mode(ibb);
+	xehp_emit_state_compute_mode(ibb, false);
 	xehp_emit_state_binding_table_pool_alloc(ibb);
 	xehp_emit_cfe_state(ibb, THREADS);
 	xehp_emit_compute_walk(ibb, x, y, width, height, &idd, color);
diff --git a/lib/gpgpu_shader.c b/lib/gpgpu_shader.c
index c591eb119..a63af0d23 100644
--- a/lib/gpgpu_shader.c
+++ b/lib/gpgpu_shader.c
@@ -179,6 +179,9 @@ __xehp_gpgpu_execfunc(struct intel_bb *ibb,
 				       4 * shdr->size, &idd);
 	idd.desc2.illegal_opcode_exception_enable = shdr->illegal_opcode_exception_enable;
 
+	if (shdr->vrt != VRT_DISABLED)
+		idd.desc2.registers_per_thread = shdr->vrt;
+
 	if (sip && sip->size)
 		sip_offset = fill_sip(ibb, sip->instr, 4 * sip->size);
 	else
@@ -190,7 +193,7 @@ __xehp_gpgpu_execfunc(struct intel_bb *ibb,
 	intel_bb_out(ibb, GEN7_PIPELINE_SELECT | GEN9_PIPELINE_SELECTION_MASK |
 		     PIPELINE_SELECT_GPGPU);
 	xehp_emit_state_base_address(ibb);
-	xehp_emit_state_compute_mode(ibb);
+	xehp_emit_state_compute_mode(ibb, shdr->vrt != VRT_DISABLED);
 	xehp_emit_state_binding_table_pool_alloc(ibb);
 	xehp_emit_cfe_state(ibb, THREADS);
 
@@ -276,7 +279,9 @@ struct gpgpu_shader *gpgpu_shader_create(int fd)
 	shdr->max_size = 16 * 4;
 	shdr->code = malloc(4 * shdr->max_size);
 	shdr->labels = igt_map_create(igt_map_hash_32, igt_map_equal_32);
+	shdr->vrt = VRT_DISABLED;
 	igt_assert(shdr->code);
+
 	return shdr;
 }
 
@@ -312,6 +317,19 @@ void gpgpu_shader_dump(struct gpgpu_shader *shdr)
 			 shdr->instr[i][2], shdr->instr[i][3]);
 }
 
+/**
+ * gpgpu_shader_set_vrt:
+ * @shdr: shader to be modified
+ * @vrt: one of accepted VRT modes
+ *
+ * Sets variable register per thread mode for given shader.
+ */
+void gpgpu_shader_set_vrt(struct gpgpu_shader *shdr, enum gpgpu_shader_vrt_modes vrt)
+{
+	igt_assert(vrt == VRT_DISABLED || shdr->gen_ver >= 3000);
+	shdr->vrt = vrt;
+}
+
 /**
  * gpgpu_shader__breakpoint_on:
  * @shdr: shader to create breakpoint in
@@ -371,14 +389,20 @@ void gpgpu_shader__nop(struct gpgpu_shader *shdr)
  */
 void gpgpu_shader__eot(struct gpgpu_shader *shdr)
 {
-	emit_iga64_code(shdr, eot, "						\n\
+	if (shdr->vrt == VRT_96)
+		emit_iga64_code(shdr, eot_vrt, "				\n\
+(W)	mov (8|M0)               r80.0<1>:ud  r0.0<8;8,1>:ud			\n\
+(W)	send.gtwy (8|M0)         null r80 src1_null     0 0x02000000 {EOT}	\n\
+		");
+	else
+		emit_iga64_code(shdr, eot, "					\n\
 (W)	mov (8|M0)               r112.0<1>:ud  r0.0<8;8,1>:ud			\n\
 #if GEN_VER < 1250								\n\
 (W)	send.ts (16|M0)          null r112 null 0x10000000 0x02000010 {EOT, at 1}	\n\
 #else										\n\
 (W)	send.gtwy (8|M0)         null r112 src1_null     0 0x02000000 {EOT}	\n\
 #endif										\n\
-	");
+		");
 }
 
 /**
diff --git a/lib/gpgpu_shader.h b/lib/gpgpu_shader.h
index 2ad6a7010..ca996d574 100644
--- a/lib/gpgpu_shader.h
+++ b/lib/gpgpu_shader.h
@@ -13,6 +13,11 @@
 struct intel_bb;
 struct intel_buf;
 
+enum gpgpu_shader_vrt_modes {
+	VRT_96 = 0x2,
+	VRT_DISABLED,
+};
+
 struct gpgpu_shader {
 	uint32_t gen_ver;
 	uint32_t size;
@@ -23,6 +28,7 @@ struct gpgpu_shader {
 	};
 	struct igt_map *labels;
 	bool illegal_opcode_exception_enable;
+	enum gpgpu_shader_vrt_modes vrt;
 };
 
 struct iga64_template {
@@ -63,6 +69,8 @@ static inline uint32_t gpgpu_shader_last_instr(struct gpgpu_shader *shdr)
 	return shdr->size / 4 - 1;
 }
 
+void gpgpu_shader_set_vrt(struct gpgpu_shader *shdr, enum gpgpu_shader_vrt_modes vrt);
+
 void gpgpu_shader__wait(struct gpgpu_shader *shdr);
 void gpgpu_shader__breakpoint_on(struct gpgpu_shader *shdr, uint32_t cmd_no);
 void gpgpu_shader__breakpoint(struct gpgpu_shader *shdr);
diff --git a/lib/gpu_cmds.c b/lib/gpu_cmds.c
index f6a9bd09f..a6a9247dc 100644
--- a/lib/gpu_cmds.c
+++ b/lib/gpu_cmds.c
@@ -1008,13 +1008,13 @@ xehp_emit_cfe_state(struct intel_bb *ibb, uint32_t threads)
 }
 
 void
-xehp_emit_state_compute_mode(struct intel_bb *ibb)
+xehp_emit_state_compute_mode(struct intel_bb *ibb, bool vrt)
 {
 
 	uint32_t dword_length = intel_graphics_ver(ibb->devid) >= IP_VER(20, 0);
 
 	intel_bb_out(ibb, XEHP_STATE_COMPUTE_MODE | dword_length);
-	intel_bb_out(ibb, 0);
+	intel_bb_out(ibb, vrt ? (0x10001) << 10 : 0); /* Enable variable number of threads */
 
 	if (dword_length)
 		intel_bb_out(ibb, 0);
diff --git a/lib/gpu_cmds.h b/lib/gpu_cmds.h
index 1b9156a80..846d2122a 100644
--- a/lib/gpu_cmds.h
+++ b/lib/gpu_cmds.h
@@ -124,7 +124,7 @@ xehp_fill_interface_descriptor(struct intel_bb *ibb,
 			       struct xehp_interface_descriptor_data *idd);
 
 void
-xehp_emit_state_compute_mode(struct intel_bb *ibb);
+xehp_emit_state_compute_mode(struct intel_bb *ibb, bool vrt);
 
 void
 xehp_emit_state_binding_table_pool_alloc(struct intel_bb *ibb);
diff --git a/lib/iga64_generated_codes.c b/lib/iga64_generated_codes.c
index e1f68c968..a74a8864e 100644
--- a/lib/iga64_generated_codes.c
+++ b/lib/iga64_generated_codes.c
@@ -3,7 +3,7 @@
 
 #include "gpgpu_shader.h"
 
-#define MD5_SUM_IGA64_ASMS f0c9d803408104207f0427e387a8050c
+#define MD5_SUM_IGA64_ASMS 80bb609ce27131259d19629dc74e349f
 
 struct iga64_template const iga64_code_gpgpu_fill[] = {
 	{ .gen_ver = 2000, .size = 44, .code = (const uint32_t []) {
@@ -747,6 +747,31 @@ struct iga64_template const iga64_code_eot[] = {
 	}}
 };
 
+struct iga64_template const iga64_code_eot_vrt[] = {
+	{ .gen_ver = 2000, .size = 8, .code = (const uint32_t []) {
+		0x800c0061, 0x50050220, 0x00460005, 0x00000000,
+		0x800f2031, 0x00000004, 0x3000500c, 0x00000000,
+	}},
+	{ .gen_ver = 1270, .size = 12, .code = (const uint32_t []) {
+		0x80030061, 0x50050220, 0x00460005, 0x00000000,
+		0x80001901, 0x00010000, 0x00000000, 0x00000000,
+		0x80034031, 0x00000004, 0x3000500c, 0x00000000,
+	}},
+	{ .gen_ver = 1260, .size = 8, .code = (const uint32_t []) {
+		0x800c0061, 0x50050220, 0x00460005, 0x00000000,
+		0x800f2031, 0x00000004, 0x3000500c, 0x00000000,
+	}},
+	{ .gen_ver = 1250, .size = 12, .code = (const uint32_t []) {
+		0x80030061, 0x50050220, 0x00460005, 0x00000000,
+		0x80001901, 0x00010000, 0x00000000, 0x00000000,
+		0x80034031, 0x00000004, 0x3000500c, 0x00000000,
+	}},
+	{ .gen_ver = 0, .size = 8, .code = (const uint32_t []) {
+		0x80030061, 0x50050220, 0x00460005, 0x00000000,
+		0x80039031, 0x00000004, 0x3000500c, 0x00000000,
+	}}
+};
+
 struct iga64_template const iga64_code_nop[] = {
 	{ .gen_ver = 1250, .size = 8, .code = (const uint32_t []) {
 		0x00000060, 0x00000000, 0x00000000, 0x00000000,
diff --git a/lib/xehp_media.h b/lib/xehp_media.h
index c08288b46..fb65b8f20 100644
--- a/lib/xehp_media.h
+++ b/lib/xehp_media.h
@@ -45,7 +45,9 @@ struct xehp_interface_descriptor_data {
 		uint32_t single_program_flow: BITRANGE(18, 18);
 		uint32_t denorm_mode: BITRANGE(19, 19);
 		uint32_t thread_preemption_disable: BITRANGE(20, 20);
-		uint32_t pad5: BITRANGE(21, 31);
+		uint32_t pad5: BITRANGE(21, 25);
+		uint32_t registers_per_thread: BITRANGE(26, 30);
+		uint32_t pad6: BITRANGE(31, 31);
 	} desc2;
 
 	struct {
-- 
2.34.1



More information about the igt-dev mailing list