[PATCH i-g-t 1/2] lib: Add variable registers per thread (VRT) setup
Manszewski, Christoph
christoph.manszewski at intel.com
Tue Mar 4 11:36:31 UTC 2025
Hi Dominik,
On 26.02.2025 11:44, Dominik Grzegorzek wrote:
> For eu debug testing purposes we need utilize all possible threads per
> eu. This is possible only if we limit number of GRFs per single thread.
> Add gpgpu_shader interface which allow us to setup that during pipeline
> creation. For now define only only mode with 96 grfs.
>
> Signed-off-by: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
Reviewed-by: Christoph Manszewski <christoph.manszewski at intel.com>
> ---
> lib/gpgpu_fill.c | 2 +-
> lib/gpgpu_shader.c | 30 +++++++++++++++++++++++++++---
> lib/gpgpu_shader.h | 8 ++++++++
> lib/gpu_cmds.c | 4 ++--
> lib/gpu_cmds.h | 2 +-
> lib/iga64_generated_codes.c | 27 ++++++++++++++++++++++++++-
> lib/xehp_media.h | 4 +++-
> 7 files changed, 68 insertions(+), 9 deletions(-)
>
> diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
> index fe0b8b35d..f83eee5f2 100644
> --- a/lib/gpgpu_fill.c
> +++ b/lib/gpgpu_fill.c
> @@ -359,7 +359,7 @@ void xehp_gpgpu_fillfunc(int i915,
> intel_bb_out(ibb, GEN7_PIPELINE_SELECT | GEN9_PIPELINE_SELECTION_MASK |
> PIPELINE_SELECT_GPGPU);
> xehp_emit_state_base_address(ibb);
> - xehp_emit_state_compute_mode(ibb);
> + xehp_emit_state_compute_mode(ibb, false);
> xehp_emit_state_binding_table_pool_alloc(ibb);
> xehp_emit_cfe_state(ibb, THREADS);
> xehp_emit_compute_walk(ibb, x, y, width, height, &idd, color);
> diff --git a/lib/gpgpu_shader.c b/lib/gpgpu_shader.c
> index c591eb119..a63af0d23 100644
> --- a/lib/gpgpu_shader.c
> +++ b/lib/gpgpu_shader.c
> @@ -179,6 +179,9 @@ __xehp_gpgpu_execfunc(struct intel_bb *ibb,
> 4 * shdr->size, &idd);
> idd.desc2.illegal_opcode_exception_enable = shdr->illegal_opcode_exception_enable;
>
> + if (shdr->vrt != VRT_DISABLED)
> + idd.desc2.registers_per_thread = shdr->vrt;
> +
> if (sip && sip->size)
> sip_offset = fill_sip(ibb, sip->instr, 4 * sip->size);
> else
> @@ -190,7 +193,7 @@ __xehp_gpgpu_execfunc(struct intel_bb *ibb,
> intel_bb_out(ibb, GEN7_PIPELINE_SELECT | GEN9_PIPELINE_SELECTION_MASK |
> PIPELINE_SELECT_GPGPU);
> xehp_emit_state_base_address(ibb);
> - xehp_emit_state_compute_mode(ibb);
> + xehp_emit_state_compute_mode(ibb, shdr->vrt != VRT_DISABLED);
> xehp_emit_state_binding_table_pool_alloc(ibb);
> xehp_emit_cfe_state(ibb, THREADS);
>
> @@ -276,7 +279,9 @@ struct gpgpu_shader *gpgpu_shader_create(int fd)
> shdr->max_size = 16 * 4;
> shdr->code = malloc(4 * shdr->max_size);
> shdr->labels = igt_map_create(igt_map_hash_32, igt_map_equal_32);
> + shdr->vrt = VRT_DISABLED;
> igt_assert(shdr->code);
> +
> return shdr;
> }
>
> @@ -312,6 +317,19 @@ void gpgpu_shader_dump(struct gpgpu_shader *shdr)
> shdr->instr[i][2], shdr->instr[i][3]);
> }
>
> +/**
> + * gpgpu_shader_set_vrt:
> + * @shdr: shader to be modified
> + * @vrt: one of accepted VRT modes
> + *
> + * Sets variable register per thread mode for given shader.
> + */
> +void gpgpu_shader_set_vrt(struct gpgpu_shader *shdr, enum gpgpu_shader_vrt_modes vrt)
> +{
> + igt_assert(vrt == VRT_DISABLED || shdr->gen_ver >= 3000);
> + shdr->vrt = vrt;
> +}
> +
> /**
> * gpgpu_shader__breakpoint_on:
> * @shdr: shader to create breakpoint in
> @@ -371,14 +389,20 @@ void gpgpu_shader__nop(struct gpgpu_shader *shdr)
> */
> void gpgpu_shader__eot(struct gpgpu_shader *shdr)
> {
> - emit_iga64_code(shdr, eot, " \n\
> + if (shdr->vrt == VRT_96)
> + emit_iga64_code(shdr, eot_vrt, " \n\
> +(W) mov (8|M0) r80.0<1>:ud r0.0<8;8,1>:ud \n\
> +(W) send.gtwy (8|M0) null r80 src1_null 0 0x02000000 {EOT} \n\
> + ");
> + else
> + emit_iga64_code(shdr, eot, " \n\
> (W) mov (8|M0) r112.0<1>:ud r0.0<8;8,1>:ud \n\
> #if GEN_VER < 1250 \n\
> (W) send.ts (16|M0) null r112 null 0x10000000 0x02000010 {EOT, at 1} \n\
> #else \n\
> (W) send.gtwy (8|M0) null r112 src1_null 0 0x02000000 {EOT} \n\
> #endif \n\
> - ");
> + ");
> }
>
> /**
> diff --git a/lib/gpgpu_shader.h b/lib/gpgpu_shader.h
> index 2ad6a7010..ca996d574 100644
> --- a/lib/gpgpu_shader.h
> +++ b/lib/gpgpu_shader.h
> @@ -13,6 +13,11 @@
> struct intel_bb;
> struct intel_buf;
>
> +enum gpgpu_shader_vrt_modes {
> + VRT_96 = 0x2,
> + VRT_DISABLED,
> +};
> +
> struct gpgpu_shader {
> uint32_t gen_ver;
> uint32_t size;
> @@ -23,6 +28,7 @@ struct gpgpu_shader {
> };
> struct igt_map *labels;
> bool illegal_opcode_exception_enable;
> + enum gpgpu_shader_vrt_modes vrt;
> };
>
> struct iga64_template {
> @@ -63,6 +69,8 @@ static inline uint32_t gpgpu_shader_last_instr(struct gpgpu_shader *shdr)
> return shdr->size / 4 - 1;
> }
>
> +void gpgpu_shader_set_vrt(struct gpgpu_shader *shdr, enum gpgpu_shader_vrt_modes vrt);
> +
> void gpgpu_shader__wait(struct gpgpu_shader *shdr);
> void gpgpu_shader__breakpoint_on(struct gpgpu_shader *shdr, uint32_t cmd_no);
> void gpgpu_shader__breakpoint(struct gpgpu_shader *shdr);
> diff --git a/lib/gpu_cmds.c b/lib/gpu_cmds.c
> index f6a9bd09f..a6a9247dc 100644
> --- a/lib/gpu_cmds.c
> +++ b/lib/gpu_cmds.c
> @@ -1008,13 +1008,13 @@ xehp_emit_cfe_state(struct intel_bb *ibb, uint32_t threads)
> }
>
> void
> -xehp_emit_state_compute_mode(struct intel_bb *ibb)
> +xehp_emit_state_compute_mode(struct intel_bb *ibb, bool vrt)
> {
>
> uint32_t dword_length = intel_graphics_ver(ibb->devid) >= IP_VER(20, 0);
>
> intel_bb_out(ibb, XEHP_STATE_COMPUTE_MODE | dword_length);
> - intel_bb_out(ibb, 0);
> + intel_bb_out(ibb, vrt ? (0x10001) << 10 : 0); /* Enable variable number of threads */
>
> if (dword_length)
> intel_bb_out(ibb, 0);
> diff --git a/lib/gpu_cmds.h b/lib/gpu_cmds.h
> index 1b9156a80..846d2122a 100644
> --- a/lib/gpu_cmds.h
> +++ b/lib/gpu_cmds.h
> @@ -124,7 +124,7 @@ xehp_fill_interface_descriptor(struct intel_bb *ibb,
> struct xehp_interface_descriptor_data *idd);
>
> void
> -xehp_emit_state_compute_mode(struct intel_bb *ibb);
> +xehp_emit_state_compute_mode(struct intel_bb *ibb, bool vrt);
>
> void
> xehp_emit_state_binding_table_pool_alloc(struct intel_bb *ibb);
> diff --git a/lib/iga64_generated_codes.c b/lib/iga64_generated_codes.c
> index e1f68c968..a74a8864e 100644
> --- a/lib/iga64_generated_codes.c
> +++ b/lib/iga64_generated_codes.c
> @@ -3,7 +3,7 @@
>
> #include "gpgpu_shader.h"
>
> -#define MD5_SUM_IGA64_ASMS f0c9d803408104207f0427e387a8050c
> +#define MD5_SUM_IGA64_ASMS 80bb609ce27131259d19629dc74e349f
>
> struct iga64_template const iga64_code_gpgpu_fill[] = {
> { .gen_ver = 2000, .size = 44, .code = (const uint32_t []) {
> @@ -747,6 +747,31 @@ struct iga64_template const iga64_code_eot[] = {
> }}
> };
>
> +struct iga64_template const iga64_code_eot_vrt[] = {
> + { .gen_ver = 2000, .size = 8, .code = (const uint32_t []) {
> + 0x800c0061, 0x50050220, 0x00460005, 0x00000000,
> + 0x800f2031, 0x00000004, 0x3000500c, 0x00000000,
> + }},
> + { .gen_ver = 1270, .size = 12, .code = (const uint32_t []) {
> + 0x80030061, 0x50050220, 0x00460005, 0x00000000,
> + 0x80001901, 0x00010000, 0x00000000, 0x00000000,
> + 0x80034031, 0x00000004, 0x3000500c, 0x00000000,
> + }},
> + { .gen_ver = 1260, .size = 8, .code = (const uint32_t []) {
> + 0x800c0061, 0x50050220, 0x00460005, 0x00000000,
> + 0x800f2031, 0x00000004, 0x3000500c, 0x00000000,
> + }},
> + { .gen_ver = 1250, .size = 12, .code = (const uint32_t []) {
> + 0x80030061, 0x50050220, 0x00460005, 0x00000000,
> + 0x80001901, 0x00010000, 0x00000000, 0x00000000,
> + 0x80034031, 0x00000004, 0x3000500c, 0x00000000,
> + }},
> + { .gen_ver = 0, .size = 8, .code = (const uint32_t []) {
> + 0x80030061, 0x50050220, 0x00460005, 0x00000000,
> + 0x80039031, 0x00000004, 0x3000500c, 0x00000000,
> + }}
> +};
> +
> struct iga64_template const iga64_code_nop[] = {
> { .gen_ver = 1250, .size = 8, .code = (const uint32_t []) {
> 0x00000060, 0x00000000, 0x00000000, 0x00000000,
> diff --git a/lib/xehp_media.h b/lib/xehp_media.h
> index c08288b46..fb65b8f20 100644
> --- a/lib/xehp_media.h
> +++ b/lib/xehp_media.h
> @@ -45,7 +45,9 @@ struct xehp_interface_descriptor_data {
> uint32_t single_program_flow: BITRANGE(18, 18);
> uint32_t denorm_mode: BITRANGE(19, 19);
> uint32_t thread_preemption_disable: BITRANGE(20, 20);
> - uint32_t pad5: BITRANGE(21, 31);
> + uint32_t pad5: BITRANGE(21, 25);
> + uint32_t registers_per_thread: BITRANGE(26, 30);
> + uint32_t pad6: BITRANGE(31, 31);
> } desc2;
>
> struct {
More information about the igt-dev
mailing list