Mesa (main): glsl,nir: Move i/umulExtended lowering to NIR.

Wed Jun 1 11:38:13 UTC 2022

Module: Mesa
Branch: main
Commit: 7472bb4badbf3bf196c1c09adbe16e70b048ab2d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=7472bb4badbf3bf196c1c09adbe16e70b048ab2d

Author: Emma Anholt <emma at anholt.net>
Date:   Tue May 10 10:31:07 2022 -0700

glsl,nir: Move i/umulExtended lowering to NIR.

NIR already has the necessary lowering, and the GLSL lowering violates
GLSL IR validation rules.  Once quadop lowering was turned off, the IR
validation at the end of the compile path on DEBUG builds caught the
problem.

In order to move the lowering to NIR, though, we need to make sure that
drivers supporting these functions actually have the lowering flag set.

xfails added for t860, where apparently this tickles a variety of existing
64-bit bugs in the backend.

Fixes: #6461
Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig at collabora.com>
Reviewed-by: Mykhailo Skorokhodov <mykhailo.skorokhodov at globallogic.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16437>

---

 src/compiler/glsl/ir_optimization.h                |  1 -
 src/compiler/glsl/lower_instructions.cpp           | 70 ----------------------
 src/gallium/auxiliary/nir/nir_to_tgsi.c            |  1 +
 src/gallium/drivers/llvmpipe/lp_screen.c           |  1 +
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   |  2 +-
 src/gallium/drivers/radeonsi/si_get.c              |  1 +
 src/gallium/drivers/softpipe/sp_screen.c           |  1 +
 src/gallium/drivers/svga/svga_screen.c             |  1 +
 src/gallium/drivers/v3d/v3d_screen.c               |  1 +
 src/mesa/state_tracker/st_glsl_to_ir.cpp           |  1 -
 src/panfrost/ci/panfrost-t860-fails.txt            |  5 ++
 src/panfrost/midgard/midgard_compile.h             |  3 +
 12 files changed, 15 insertions(+), 73 deletions(-)

diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h
index 7fc8e036431..42b5be5d79e 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -55,7 +55,6 @@ struct gl_shader_program;
 #define DDIV_TO_MUL_RCP           0x100000
 #define DIV_TO_MUL_RCP            (FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP)
 #define SQRT_TO_ABS_SQRT          0x200000
-#define MUL64_TO_MUL_AND_MUL_HIGH 0x400000
 
 /* Operations for lower_64bit_integer_instructions() */
 #define DIV64                     (1U << 0)
diff --git a/src/compiler/glsl/lower_instructions.cpp b/src/compiler/glsl/lower_instructions.cpp
index 8e6ed3c368c..f8527625cea 100644
--- a/src/compiler/glsl/lower_instructions.cpp
+++ b/src/compiler/glsl/lower_instructions.cpp
@@ -157,7 +157,6 @@ private:
    void find_msb_to_float_cast(ir_expression *ir);
    void imul_high_to_mul(ir_expression *ir);
    void sqrt_to_abs_sqrt(ir_expression *ir);
-   void mul64_to_mul_and_mul_high(ir_expression *ir);
 
    ir_expression *_carry(operand a, operand b);
 
@@ -1615,66 +1614,6 @@ lower_instructions_visitor::sqrt_to_abs_sqrt(ir_expression *ir)
    this->progress = true;
 }
 
-void
-lower_instructions_visitor::mul64_to_mul_and_mul_high(ir_expression *ir)
-{
-   /* Lower 32x32-> 64 to
-    *    msb = imul_high(x_lo, y_lo)
-    *    lsb = mul(x_lo, y_lo)
-    */
-   const unsigned elements = ir->operands[0]->type->vector_elements;
-
-   const ir_expression_operation operation =
-      ir->type->base_type == GLSL_TYPE_UINT64 ? ir_unop_pack_uint_2x32
-                                              : ir_unop_pack_int_2x32;
-
-   const glsl_type *var_type = ir->type->base_type == GLSL_TYPE_UINT64
-                               ? glsl_type::uvec(elements)
-                               : glsl_type::ivec(elements);
-
-   const glsl_type *ret_type = ir->type->base_type == GLSL_TYPE_UINT64
-                               ? glsl_type::uvec2_type
-                               : glsl_type::ivec2_type;
-
-   ir_instruction &i = *base_ir;
-
-   ir_variable *msb =
-      new(ir) ir_variable(var_type, "msb", ir_var_temporary);
-   ir_variable *lsb =
-      new(ir) ir_variable(var_type, "lsb", ir_var_temporary);
-   ir_variable *x =
-      new(ir) ir_variable(var_type, "x", ir_var_temporary);
-   ir_variable *y =
-      new(ir) ir_variable(var_type, "y", ir_var_temporary);
-
-   i.insert_before(x);
-   i.insert_before(assign(x, ir->operands[0]));
-   i.insert_before(y);
-   i.insert_before(assign(y, ir->operands[1]));
-   i.insert_before(msb);
-   i.insert_before(lsb);
-
-   i.insert_before(assign(msb, imul_high(x, y)));
-   i.insert_before(assign(lsb, mul(x, y)));
-
-   ir_rvalue *result[4] = {NULL};
-   for (unsigned elem = 0; elem < elements; elem++) {
-      ir_rvalue *val = new(ir) ir_expression(ir_quadop_vector, ret_type,
-                                             swizzle(lsb, elem, 1),
-                                             swizzle(msb, elem, 1), NULL, NULL);
-      result[elem] = expr(operation, val);
-   }
-
-   ir->operation = ir_quadop_vector;
-   ir->init_num_operands();
-   ir->operands[0] = result[0];
-   ir->operands[1] = result[1];
-   ir->operands[2] = result[2];
-   ir->operands[3] = result[3];
-
-   this->progress = true;
-}
-
 ir_visitor_status
 lower_instructions_visitor::visit_leave(ir_expression *ir)
 {
@@ -1802,15 +1741,6 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
          imul_high_to_mul(ir);
       break;
 
-   case ir_binop_mul:
-      if (lowering(MUL64_TO_MUL_AND_MUL_HIGH) &&
-          (ir->type->base_type == GLSL_TYPE_INT64 ||
-           ir->type->base_type == GLSL_TYPE_UINT64) &&
-          (ir->operands[0]->type->base_type == GLSL_TYPE_INT ||
-           ir->operands[1]->type->base_type == GLSL_TYPE_UINT))
-         mul64_to_mul_and_mul_high(ir);
-      break;
-
    case ir_unop_rsq:
    case ir_unop_sqrt:
       if (lowering(SQRT_TO_ABS_SQRT))
diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c
index abb9cb8d2de..2993fc27e22 100644
--- a/src/gallium/auxiliary/nir/nir_to_tgsi.c
+++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c
@@ -3906,6 +3906,7 @@ static const nir_shader_compiler_options nir_to_tgsi_compiler_options = {
    .lower_rotate = true,
    .lower_uniforms_to_ubo = true,
    .lower_vector_cmp = true,
+   .lower_int64_options = nir_lower_imul_2x32_64,
    .use_interpolated_input_intrinsics = true,
 };
 
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 94eda66c66c..f653f1e26fb 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -619,6 +619,7 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
    .lower_usub_borrow = true,
    .lower_mul_2x32_64 = true,
    .lower_ifind_msb = true,
+   .lower_int64_options = nir_lower_imul_2x32_64,
    .max_unroll_iterations = 32,
    .use_interpolated_input_intrinsics = true,
    .lower_to_scalar = true,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index ebc5c6f87c0..931ee85645c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -3393,7 +3393,7 @@ nvir_nir_shader_compiler_options(int chipset)
       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) |
       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) |
       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) |
-      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_2x32_64 : 0) |
+      nir_lower_imul_2x32_64 |
       ((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) |
       nir_lower_ufind_msb64
    );
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index 84d6e67bdbf..06ea8c5e535 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -1081,6 +1081,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
       .lower_insert_word = true,
       .lower_rotate = true,
       .lower_to_scalar = true,
+      .lower_int64_options = nir_lower_imul_2x32_64,
       .has_sdot_4x8 = sscreen->info.has_accelerated_dot_product,
       .has_udot_4x8 = sscreen->info.has_accelerated_dot_product,
       .has_dot_2x16 = sscreen->info.has_accelerated_dot_product,
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 7d84088e528..e59ec44cef0 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -88,6 +88,7 @@ static const nir_shader_compiler_options sp_compiler_options = {
    .lower_rotate = true,
    .lower_uniforms_to_ubo = true,
    .lower_vector_cmp = true,
+   .lower_int64_options = nir_lower_imul_2x32_64,
    .max_unroll_iterations = 32,
    .use_interpolated_input_intrinsics = true,
 };
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index 58b96ec575a..5e880020666 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -762,6 +762,7 @@ vgpu10_get_shader_param(struct pipe_screen *screen,
    .lower_extract_word = true,                                                \
    .lower_insert_byte = true,                                                 \
    .lower_insert_word = true,                                                 \
+   .lower_int64_options = nir_lower_imul_2x32_64,                             \
    .lower_fdph = true,                                                        \
    .lower_flrp64 = true,                                                      \
    .lower_rotate = true,                                                      \
diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c
index 7dc1b33a465..b0dca2afcb0 100644
--- a/src/gallium/drivers/v3d/v3d_screen.c
+++ b/src/gallium/drivers/v3d/v3d_screen.c
@@ -729,6 +729,7 @@ static const nir_shader_compiler_options v3d_nir_options = {
         .lower_wpos_pntc = true,
         .lower_rotate = true,
         .lower_to_scalar = true,
+        .lower_int64_options = nir_lower_imul_2x32_64,
         .has_fsub = true,
         .has_isub = true,
         .divergence_analysis_options =
diff --git a/src/mesa/state_tracker/st_glsl_to_ir.cpp b/src/mesa/state_tracker/st_glsl_to_ir.cpp
index 769226065dc..d8333bb44af 100644
--- a/src/mesa/state_tracker/st_glsl_to_ir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_ir.cpp
@@ -106,7 +106,6 @@ link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
                          FDIV_TO_MUL_RCP |
                          EXP_TO_EXP2 |
                          LOG_TO_LOG2 |
-                         MUL64_TO_MUL_AND_MUL_HIGH |
                          (have_ldexp ? 0 : LDEXP_TO_ARITH) |
                          (have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) |
                          CARRY_TO_ARITH |
diff --git a/src/panfrost/ci/panfrost-t860-fails.txt b/src/panfrost/ci/panfrost-t860-fails.txt
index f679b2d32d1..c05cbd19824 100644
--- a/src/panfrost/ci/panfrost-t860-fails.txt
+++ b/src/panfrost/ci/panfrost-t860-fails.txt
@@ -87,3 +87,8 @@ dEQP-GLES31.functional.texture.multisample.samples_3.use_texture_uint_2d,Fail
 dEQP-GLES31.functional.texture.multisample.samples_3.use_texture_uint_2d_array,Fail
 dEQP-GLES31.functional.texture.multisample.samples_4.use_texture_int_2d,Fail
 dEQP-GLES31.functional.texture.multisample.samples_4.use_texture_int_2d_array,Fail
+
+dEQP-GLES31.functional.shaders.builtin_functions.integer.imulextended.ivec3_highp_compute,Fail
+dEQP-GLES31.functional.shaders.builtin_functions.integer.umulextended.uvec3_highp_compute,Fail
+dEQP-GLES31.functional.shaders.builtin_functions.integer.imulextended.ivec4_highp_vertex,Fail
+dEQP-GLES31.functional.shaders.builtin_functions.integer.umulextended.uvec4_highp_vertex,Fail
diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h
index 3c5193f63fe..5ab520a37eb 100644
--- a/src/panfrost/midgard/midgard_compile.h
+++ b/src/panfrost/midgard/midgard_compile.h
@@ -83,6 +83,9 @@ static const nir_shader_compiler_options midgard_nir_options = {
         .lower_unpack_unorm_4x8 = true,
         .lower_unpack_snorm_4x8 = true,
         .lower_pack_split = true,
+        .lower_pack_64_2x32_split = true,
+        .lower_unpack_64_2x32_split = true,
+        .lower_int64_options = nir_lower_imul_2x32_64,
 
         .lower_doubles_options = nir_lower_dmod,