Mesa (master): aco: implement 8/16-bit instructions which can be trivially widened

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Nov 4 12:08:03 UTC 2020


Module: Mesa
Branch: master
Commit: 786828131a7c72ae1f9a21159255464ac7f4ae8b
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=786828131a7c72ae1f9a21159255464ac7f4ae8b

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Wed Jul  8 19:19:43 2020 +0100

aco: implement 8/16-bit instructions which can be trivially widened

When nir_lower_bit_size becomes more capable, we might want to revert some
of this.

fossil-db (parallel-rdp, Navi):
Totals from 217 (31.77% of 683) affected shaders:
SGPRs: 11320 -> 10200 (-9.89%)
VGPRs: 7156 -> 7364 (+2.91%)
CodeSize: 1453948 -> 1430136 (-1.64%); split: -1.66%, +0.02%
Instrs: 258530 -> 254840 (-1.43%); split: -1.44%, +0.01%
Cycles: 37334360 -> 37247936 (-0.23%); split: -0.26%, +0.03%

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4791>

---

 src/amd/compiler/aco_instruction_selection.cpp | 34 ++++++++++++++------------
 src/amd/vulkan/radv_pipeline.c                 |  7 ------
 2 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index b2928c239b3..d1b7da5b5d0 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -1224,7 +1224,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
          /* Don't use s_andn2 here, this allows the optimizer to make a better decision */
          Temp tmp = bld.sop1(Builder::s_not, bld.def(bld.lm), bld.def(s1, scc), src);
          bld.sop2(Builder::s_and, Definition(dst), bld.def(s1, scc), tmp, Operand(exec, bld.lm));
-      } else if (dst.regClass() == v1) {
+      } else if (dst.regClass() == v1 || dst.regClass() == v2b || dst.regClass() == v1b) {
          emit_vop1_instruction(ctx, instr, aco_opcode::v_not_b32, dst);
       } else if (dst.regClass() == v2) {
          Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
@@ -1365,7 +1365,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
    case nir_op_ior: {
       if (instr->dest.dest.ssa.bit_size == 1) {
          emit_boolean_logic(ctx, instr, Builder::s_or, dst);
-      } else if (dst.regClass() == v1) {
+      } else if (dst.regClass() == v1 || dst.regClass() == v2b || dst.regClass() == v1b) {
          emit_vop2_instruction(ctx, instr, aco_opcode::v_or_b32, dst, true);
       } else if (dst.regClass() == v2) {
          emit_vop2_instruction_logic64(ctx, instr, aco_opcode::v_or_b32, dst);
@@ -1381,7 +1381,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
    case nir_op_iand: {
       if (instr->dest.dest.ssa.bit_size == 1) {
          emit_boolean_logic(ctx, instr, Builder::s_and, dst);
-      } else if (dst.regClass() == v1) {
+      } else if (dst.regClass() == v1 || dst.regClass() == v2b || dst.regClass() == v1b) {
          emit_vop2_instruction(ctx, instr, aco_opcode::v_and_b32, dst, true);
       } else if (dst.regClass() == v2) {
          emit_vop2_instruction_logic64(ctx, instr, aco_opcode::v_and_b32, dst);
@@ -1397,7 +1397,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
    case nir_op_ixor: {
       if (instr->dest.dest.ssa.bit_size == 1) {
          emit_boolean_logic(ctx, instr, Builder::s_xor, dst);
-      } else if (dst.regClass() == v1) {
+      } else if (dst.regClass() == v1 || dst.regClass() == v2b || dst.regClass() == v1b) {
          emit_vop2_instruction(ctx, instr, aco_opcode::v_xor_b32, dst, true);
       } else if (dst.regClass() == v2) {
          emit_vop2_instruction_logic64(ctx, instr, aco_opcode::v_xor_b32, dst);
@@ -1527,17 +1527,17 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
       if (dst.regClass() == s1) {
          emit_sop2_instruction(ctx, instr, aco_opcode::s_add_u32, dst, true);
          break;
-      } else if (dst.regClass() == v2b && ctx->program->chip_class < GFX10) {
-         emit_vop2_instruction(ctx, instr, aco_opcode::v_add_u16, dst, true);
-         break;
-      } else if (dst.regClass() == v2b) {
+      } else if (dst.bytes() <= 2 && ctx->program->chip_class >= GFX10) {
          emit_vop3a_instruction(ctx, instr, aco_opcode::v_add_u16_e64, dst);
          break;
+      } else if (dst.bytes() <= 2 && ctx->program->chip_class >= GFX8) {
+         emit_vop2_instruction(ctx, instr, aco_opcode::v_add_u16, dst, true);
+         break;
       }
 
       Temp src0 = get_alu_src(ctx, instr->src[0]);
       Temp src1 = get_alu_src(ctx, instr->src[1]);
-      if (dst.regClass() == v1) {
+      if (dst.type() == RegType::vgpr && dst.bytes() <= 4) {
          bld.vadd32(Definition(dst), Operand(src0), Operand(src1));
          break;
       }
@@ -1649,13 +1649,15 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
       if (dst.regClass() == v1) {
          bld.vsub32(Definition(dst), src0, src1);
          break;
-      } else if (dst.regClass() == v2b) {
+      } else if (dst.bytes() <= 2) {
          if (ctx->program->chip_class >= GFX10)
             bld.vop3(aco_opcode::v_sub_u16_e64, Definition(dst), src0, src1);
          else if (src1.type() == RegType::sgpr)
             bld.vop2(aco_opcode::v_subrev_u16, Definition(dst), src1, as_vgpr(ctx, src0));
-         else
+         else if (ctx->program->chip_class >= GFX8)
             bld.vop2(aco_opcode::v_sub_u16, Definition(dst), src0, as_vgpr(ctx, src1));
+         else
+            bld.vsub32(Definition(dst), src0, src1);
          break;
       }
 
@@ -1714,7 +1716,11 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
       break;
    }
    case nir_op_imul: {
-      if (dst.regClass() == v1) {
+      if (dst.bytes() <= 2 && ctx->program->chip_class >= GFX10) {
+         emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_lo_u16_e64, dst);
+      } else if (dst.bytes() <= 2 && ctx->program->chip_class >= GFX8) {
+         emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_lo_u16, dst, true);
+      } else if (dst.type() == RegType::vgpr) {
          uint32_t src0_ub = get_alu_src_ub(ctx, instr, 0);
          uint32_t src1_ub = get_alu_src_ub(ctx, instr, 1);
 
@@ -1723,10 +1729,6 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
          } else {
             emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_lo_u32, dst);
          }
-      } else if (dst.regClass() == v2b && ctx->program->chip_class >= GFX10) {
-         emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_lo_u16_e64, dst);
-      } else if (dst.regClass() == v2b) {
-         emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_lo_u16, dst, true);
       } else if (dst.regClass() == s1) {
          emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_i32, dst, false);
       } else {
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index f57c54c2a04..c917b11c643 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -2957,7 +2957,6 @@ lower_bit_size_callback(const nir_alu_instr *alu, void *_)
 		unsigned bit_size = alu->dest.dest.ssa.bit_size;
 		switch (alu->op) {
 		case nir_op_iabs:
-		case nir_op_iand:
 		case nir_op_bitfield_select:
 		case nir_op_udiv:
 		case nir_op_idiv:
@@ -2966,11 +2965,8 @@ lower_bit_size_callback(const nir_alu_instr *alu, void *_)
 		case nir_op_imul_high:
 		case nir_op_umul_high:
 		case nir_op_ineg:
-		case nir_op_inot:
-		case nir_op_ior:
 		case nir_op_irem:
 		case nir_op_isign:
-		case nir_op_ixor:
 			return 32;
 		case nir_op_imax:
 		case nir_op_umax:
@@ -2979,10 +2975,7 @@ lower_bit_size_callback(const nir_alu_instr *alu, void *_)
 		case nir_op_ishr:
 		case nir_op_ushr:
 		case nir_op_ishl:
-		case nir_op_iadd:
 		case nir_op_uadd_sat:
-		case nir_op_isub:
-		case nir_op_imul:
 			return (bit_size == 8 ||
 			        !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32 : 0;
 		default:



More information about the mesa-commit mailing list