Mesa (master): aco: improve fsign selection

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Sep 8 12:32:14 UTC 2020


Module: Mesa
Branch: master
Commit: 6049dc1a9d5cb1a3dae063e52409028213d5492a
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=6049dc1a9d5cb1a3dae063e52409028213d5492a

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Thu Sep  3 14:56:26 2020 +0100

aco: improve fsign selection

Idea from https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6284

fossil-db (Navi):
Totals from 4053 (2.95% of 137413) affected shaders:
SGPRs: 305810 -> 305906 (+0.03%); split: -0.01%, +0.04%
VGPRs: 249000 -> 249144 (+0.06%); split: -0.01%, +0.07%
CodeSize: 29967092 -> 29885768 (-0.27%); split: -0.27%, +0.00%
Instrs: 5749494 -> 5737971 (-0.20%); split: -0.20%, +0.00%
Cycles: 255028584 -> 254955444 (-0.03%); split: -0.04%, +0.01%

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6583>

---

 src/amd/compiler/aco_instruction_selection.cpp | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 1d543073f1b..2a2dc0687df 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -2127,17 +2127,15 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
    case nir_op_fsign: {
       Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
       if (dst.regClass() == v2b) {
-         Temp one = bld.copy(bld.def(v1), Operand(0x3c00u));
-         Temp minus_one = bld.copy(bld.def(v1), Operand(0xbc00u));
-         Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f16, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src);
-         src = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), one, src, cond);
-         cond = bld.vopc(aco_opcode::v_cmp_le_f16, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src);
-         bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), minus_one, src, cond);
+         assert(ctx->program->chip_class >= GFX9);
+         /* replace negative zero with positive zero */
+         src = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), Operand(0u), src);
+         src = bld.vop3(aco_opcode::v_med3_i16, bld.def(v2b), Operand((uint16_t)-1), src, Operand((uint16_t)1u));
+         bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src);
       } else if (dst.regClass() == v1) {
-         Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f32, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src);
-         src = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0x3f800000u), src, cond);
-         cond = bld.vopc(aco_opcode::v_cmp_le_f32, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src);
-         bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), Operand(0xbf800000u), src, cond);
+         src = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand(0u), src);
+         src = bld.vop3(aco_opcode::v_med3_i32, bld.def(v1), Operand((uint32_t)-1), src, Operand(1u));
+         bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(dst), src);
       } else if (dst.regClass() == v2) {
          Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f64, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src);
          Temp tmp = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1), Operand(0x3FF00000u));



More information about the mesa-commit mailing list