Mesa (master): radv/aco,aco: set lower_fmod

Fri Oct 4 14:01:20 UTC 2019

Module: Mesa
Branch: master
Commit: a87b0f51410932e03f43a0f5c8ad21a9099e57df
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a87b0f51410932e03f43a0f5c8ad21a9099e57df

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Thu Oct  3 15:32:19 2019 +0100

radv/aco,aco: set lower_fmod

This simplifies ACO and allows the lowered code to be optimized (in
particular, constant folded).

Totals from affected shaders:
SGPRS: 1776 -> 1776 (0.00 %)
VGPRS: 1436 -> 1436 (0.00 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 203452 -> 203564 (0.06 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 103 -> 103 (0.00 %)

At least some of the code size increase seems to be from literals being
applied to instructions as a result of constant folding.

v2: remove fmod/frem handling in init_context()

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>

---

 src/amd/compiler/aco_instruction_selection.cpp     | 29 ----------------------
 .../compiler/aco_instruction_selection_setup.cpp   |  2 --
 src/amd/vulkan/radv_shader.c                       |  1 +
 3 files changed, 1 insertion(+), 31 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index b08a4008330..70d1258a20c 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -1225,35 +1225,6 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
       }
       break;
    }
-   case nir_op_fmod:
-   case nir_op_frem: {
-      if (dst.size() == 1) {
-         Temp rcp = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), get_alu_src(ctx, instr->src[1]));
-         Temp mul = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), get_alu_src(ctx, instr->src[0]), rcp);
-
-         aco_opcode op = instr->op == nir_op_fmod ? aco_opcode::v_floor_f32 : aco_opcode::v_trunc_f32;
-         Temp floor = bld.vop1(op, bld.def(v1), mul);
-
-         mul = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), get_alu_src(ctx, instr->src[1]), floor);
-         bld.vop2(aco_opcode::v_sub_f32, Definition(dst), get_alu_src(ctx, instr->src[0]), mul);
-      } else if (dst.size() == 2) {
-         Temp rcp = bld.vop1(aco_opcode::v_rcp_f64, bld.def(v2), get_alu_src(ctx, instr->src[1]));
-         Temp mul = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), get_alu_src(ctx, instr->src[0]), rcp);
-
-         aco_opcode op = instr->op == nir_op_fmod ? aco_opcode::v_floor_f64 : aco_opcode::v_trunc_f64;
-         Temp floor = bld.vop1(op, bld.def(v1), mul);
-
-         mul = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), get_alu_src(ctx, instr->src[1]), floor);
-         Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), get_alu_src(ctx, instr->src[0]), mul);
-         VOP3A_instruction* sub = static_cast<VOP3A_instruction*>(add);
-         sub->neg[1] = true;
-      } else {
-         fprintf(stderr, "Unimplemented NIR instr bit size: ");
-         nir_print_instr(&instr->instr, stderr);
-         fprintf(stderr, "\n");
-      }
-      break;
-   }
    case nir_op_fmax: {
       if (dst.size() == 1) {
          emit_vop2_instruction(ctx, instr, aco_opcode::v_max_f32, dst, true);
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index a32ada0613e..2d06ac87b9d 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -201,8 +201,6 @@ void init_context(isel_context *ctx, nir_shader *shader)
                   case nir_op_fmax3:
                   case nir_op_fmin3:
                   case nir_op_fmed3:
-                  case nir_op_fmod:
-                  case nir_op_frem:
                   case nir_op_fneg:
                   case nir_op_fabs:
                   case nir_op_fsat:
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 09c5ce639c8..ee37c06b8ca 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -91,6 +91,7 @@ static const struct nir_shader_compiler_options nir_options_aco = {
 	.lower_flrp64 = true,
 	.lower_device_index_to_zero = true,
 	.lower_fdiv = true,
+	.lower_fmod = true,
 	.lower_bitfield_insert_to_bitfield_select = true,
 	.lower_bitfield_extract = true,
 	.lower_pack_snorm_2x16 = true,