Mesa (staging/20.2): aco: fix combine_constant_comparison_ordering() NaN check with 16/64-bit

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Dec 3 18:46:23 UTC 2020


Module: Mesa
Branch: staging/20.2
Commit: 7ea1f6ff78e1e8a2f090e67cdfea7cfe2a09a947
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=7ea1f6ff78e1e8a2f090e67cdfea7cfe2a09a947

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Wed Oct  7 11:40:45 2020 +0100

aco: fix combine_constant_comparison_ordering() NaN check with 16/64-bit

No fossil-db changes.

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7045>

---

 .pick_status.json                         |  2 +-
 src/amd/compiler/aco_optimizer.cpp        | 46 +++++++++++++--------
 src/amd/compiler/tests/test_optimizer.cpp | 66 +++++++++++++++++++++++++++++++
 3 files changed, 97 insertions(+), 17 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 141f4077171..873383191f5 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -8176,7 +8176,7 @@
         "description": "aco: fix combine_constant_comparison_ordering() NaN check with 16/64-bit",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 3,
         "master_sha": null,
         "because_sha": null
     },
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index a76ef116f9e..1eeef1f5bb9 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -1728,6 +1728,31 @@ bool combine_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& instr)
    return true;
 }
 
+bool is_operand_constant(opt_ctx &ctx, Operand op, unsigned bit_size, uint64_t *value)
+{
+   if (op.isConstant()) {
+      *value = op.constantValue64();
+      return true;
+   } else if (op.isTemp()) {
+      unsigned id = original_temp_id(ctx, op.getTemp());
+      if (!ctx.info[id].is_constant_or_literal(bit_size))
+         return false;
+      *value = get_constant_op(ctx, ctx.info[id], bit_size).constantValue64();
+      return true;
+   }
+   return false;
+}
+
+bool is_constant_nan(uint64_t value, unsigned bit_size)
+{
+   if (bit_size == 16)
+      return ((value >> 10) & 0x1f) == 0x1f && (value & 0x3ff);
+   else if (bit_size == 32)
+      return ((value >> 23) & 0xff) == 0xff && (value & 0x7fffff);
+   else
+      return ((value >> 52) & 0x7ff) == 0x7ff && (value & 0xfffffffffffff);
+}
+
 /* s_or_b64(v_cmp_neq_f32(a, a), cmp(a, #b)) and b is not NaN -> get_unordered(cmp)(a, b)
  * s_and_b64(v_cmp_eq_f32(a, a), cmp(a, #b)) and b is not NaN -> get_ordered(cmp)(a, b) */
 bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& instr)
@@ -1751,7 +1776,8 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& in
    else if (get_f32_cmp(nan_test->opcode) != expected_nan_test)
       return false;
 
-   if (!is_cmp(cmp->opcode) || get_cmp_bitsize(cmp->opcode) != get_cmp_bitsize(nan_test->opcode))
+   unsigned bit_size = get_cmp_bitsize(cmp->opcode);
+   if (!is_cmp(cmp->opcode) || get_cmp_bitsize(nan_test->opcode) != bit_size)
       return false;
 
    if (!nan_test->operands[0].isTemp() || !nan_test->operands[1].isTemp())
@@ -1780,22 +1806,10 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& in
    if (constant_operand == -1)
       return false;
 
-   uint32_t constant;
-   if (cmp->operands[constant_operand].isConstant()) {
-      constant = cmp->operands[constant_operand].constantValue();
-   } else if (cmp->operands[constant_operand].isTemp()) {
-      Temp tmp = cmp->operands[constant_operand].getTemp();
-      unsigned id = original_temp_id(ctx, tmp);
-      if (!ctx.info[id].is_constant_or_literal(32))
-         return false;
-      constant = ctx.info[id].val;
-   } else {
+   uint64_t constant_value;
+   if (!is_operand_constant(ctx, cmp->operands[constant_operand], bit_size, &constant_value))
       return false;
-   }
-
-   float constantf;
-   memcpy(&constantf, &constant, 4);
-   if (isnan(constantf))
+   if (is_constant_nan(constant_value, bit_size))
       return false;
 
    if (cmp->operands[0].isTemp())
diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp
index f10b63de6f6..d76cc37f8b1 100644
--- a/src/amd/compiler/tests/test_optimizer.cpp
+++ b/src/amd/compiler/tests/test_optimizer.cpp
@@ -115,3 +115,69 @@ BEGIN_TEST(optimize.clamp)
 
    finish_opt_test();
 END_TEST
+
+BEGIN_TEST(optimize.const_comparison_ordering)
+   //>> v1: %a, v1: %b, v2: %c, v1: %d, s2: %_:exec = p_startpgm
+   if (!setup_cs("v1 v1 v2 v1", GFX9))
+      return;
+
+   /* optimize to unordered comparison */
+   //! s2: %res0 = v_cmp_nge_f32 4.0, %a
+   //! p_unit_test 0, %res0
+   writeout(0, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc),
+                        bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]),
+                        bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0])));
+
+   //! s2: %res1 = v_cmp_nge_f32 4.0, %a
+   //! p_unit_test 1, %res1
+   writeout(1, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc),
+                        bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]),
+                        bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0])));
+
+   //! s2: %res2 = v_cmp_nge_f32 0x40a00000, %a
+   //! p_unit_test 2, %res2
+   writeout(2, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc),
+                        bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]),
+                        bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), bld.copy(bld.def(v1), Operand(0x40a00000u)), inputs[0])));
+
+   /* optimize to ordered comparison */
+   //! s2: %res3 = v_cmp_lt_f32 4.0, %a
+   //! p_unit_test 3, %res3
+   writeout(3, bld.sop2(aco_opcode::s_and_b64, bld.def(bld.lm), bld.def(s1, scc),
+                        bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), inputs[0], inputs[0]),
+                        bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0])));
+
+   //! s2: %res4 = v_cmp_lt_f32 4.0, %a
+   //! p_unit_test 4, %res4
+   writeout(4, bld.sop2(aco_opcode::s_and_b64, bld.def(bld.lm), bld.def(s1, scc),
+                        bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), inputs[0], inputs[0]),
+                        bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0])));
+
+   //! s2: %res5 = v_cmp_lt_f32 0x40a00000, %a
+   //! p_unit_test 5, %res5
+   writeout(5, bld.sop2(aco_opcode::s_and_b64, bld.def(bld.lm), bld.def(s1, scc),
+                        bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), inputs[0], inputs[0]),
+                        bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), bld.copy(bld.def(v1), Operand(0x40a00000u)), inputs[0])));
+
+   /* NaN */
+   uint16_t nan16 = 0x7e00;
+   uint32_t nan32 = 0x7fc00000;
+
+   //! s2: %tmp6_0 = v_cmp_lt_f16 0x7e00, %a
+   //! s2: %tmp6_1 = v_cmp_neq_f16 %a, %a
+   //! s2: %res6, s1: %_:scc = s_or_b64 %tmp6_1, %tmp6_0
+   //! p_unit_test 6, %res6
+   writeout(6, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc),
+                         bld.vopc(aco_opcode::v_cmp_neq_f16, bld.def(bld.lm), inputs[0], inputs[0]),
+                         bld.vopc(aco_opcode::v_cmp_lt_f16, bld.def(bld.lm), Operand(nan16), inputs[0])));
+
+   //! s2: %tmp7_0 = v_cmp_lt_f32 0x7fc00000, %a
+   //! s2: %tmp7_1 = v_cmp_neq_f32 %a, %a
+   //! s2: %res7, s1: %_:scc = s_or_b64 %tmp7_1, %tmp7_0
+   //! p_unit_test 7, %res7
+   writeout(7, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc),
+                         bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]),
+                         bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(nan32), inputs[0])));
+
+   finish_opt_test();
+END_TEST



More information about the mesa-commit mailing list