Mesa (main): radv,aco,ac/llvm: implement fmulz and ffmaz

Thu Jan 20 23:23:39 UTC 2022

Module: Mesa
Branch: main
Commit: e7f91b194aec240ceadc1a0cdb2e20ba821d37c2
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=e7f91b194aec240ceadc1a0cdb2e20ba821d37c2

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Tue Apr 27 12:28:27 2021 +0100

radv,aco,ac/llvm: implement fmulz and ffmaz

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13436>

---

 src/amd/compiler/aco_instruction_selection.cpp       | 17 +++++++++++++++++
 src/amd/compiler/aco_instruction_selection_setup.cpp |  2 ++
 src/amd/llvm/ac_nir_to_llvm.c                        | 15 +++++++++++++++
 src/amd/vulkan/radv_shader.c                         |  5 +++++
 4 files changed, 39 insertions(+)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 6c378111ff4..d8af3d9e76b 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -2069,6 +2069,14 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
       }
       break;
    }
+   case nir_op_fmulz: {
+      if (dst.regClass() == v1) {
+         emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_legacy_f32, dst, true);
+      } else {
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
+      }
+      break;
+   }
    case nir_op_fadd: {
       if (dst.regClass() == v2b) {
          emit_vop2_instruction(ctx, instr, aco_opcode::v_add_f16, dst, true);
@@ -2141,6 +2149,15 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
       }
       break;
    }
+   case nir_op_ffmaz: {
+      if (dst.regClass() == v1) {
+         emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_legacy_f32, dst,
+                                ctx->block->fp_mode.must_flush_denorms32, 3);
+      } else {
+         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
+      }
+      break;
+   }
    case nir_op_fmax: {
       if (dst.regClass() == v2b) {
          // TODO: check fp_mode.must_flush_denorms16_64
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index 3f4396c1a99..7cfd616b135 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -480,9 +480,11 @@ init_context(isel_context* ctx, nir_shader* shader)
                   nir_dest_is_divergent(alu_instr->dest.dest) ? RegType::vgpr : RegType::sgpr;
                switch (alu_instr->op) {
                case nir_op_fmul:
+               case nir_op_fmulz:
                case nir_op_fadd:
                case nir_op_fsub:
                case nir_op_ffma:
+               case nir_op_ffmaz:
                case nir_op_fmax:
                case nir_op_fmin:
                case nir_op_fneg:
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index e038e49f094..53a641e848d 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -688,6 +688,13 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
       src[1] = ac_to_float(&ctx->ac, src[1]);
       result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
       break;
+   case nir_op_fmulz:
+      assert(LLVM_VERSION_MAJOR >= 12);
+      src[0] = ac_to_float(&ctx->ac, src[0]);
+      src[1] = ac_to_float(&ctx->ac, src[1]);
+      result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.fmul.legacy", ctx->ac.f32,
+                                  src, 2, AC_FUNC_ATTR_READNONE);
+      break;
    case nir_op_frcp:
       /* For doubles, we need precise division to pass GLCTS. */
       if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL && ac_get_type_size(def_type) == 8) {
@@ -906,6 +913,14 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
       result = emit_intrin_3f_param(&ctx->ac, "llvm.fma", ac_to_float_type(&ctx->ac, def_type),
                                     src[0], src[1], src[2]);
       break;
+   case nir_op_ffmaz:
+      assert(LLVM_VERSION_MAJOR >= 12 && ctx->ac.chip_class >= GFX10_3);
+      src[0] = ac_to_float(&ctx->ac, src[0]);
+      src[1] = ac_to_float(&ctx->ac, src[1]);
+      src[2] = ac_to_float(&ctx->ac, src[2]);
+      result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.fma.legacy", ctx->ac.f32,
+                                  src, 3, AC_FUNC_ATTR_READNONE);
+      break;
    case nir_op_ldexp:
       src[0] = ac_to_float(&ctx->ac, src[0]);
       if (ac_get_elem_bits(&ctx->ac, def_type) == 32)
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index d7c59c77bf1..de9bc7b01d0 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -92,6 +92,11 @@ get_nir_options_for_stage(struct radv_physical_device *device, gl_shader_stage s
       .has_udot_4x8 = device->rad_info.has_accelerated_dot_product,
       .has_dot_2x16 = device->rad_info.has_accelerated_dot_product,
       .use_scoped_barrier = true,
+#ifdef LLVM_AVAILABLE
+      .has_fmulz = !device->use_llvm || LLVM_VERSION_MAJOR >= 12,
+#else
+      .has_fmulz = true,
+#endif
       .max_unroll_iterations = 32,
       .max_unroll_iterations_aggressive = 128,
       .use_interpolated_input_intrinsics = true,