Mesa (master): intel/compiler: don't propagate cmp to add if add is saturated

Sat Jul 11 00:39:32 UTC 2020

Module: Mesa
Branch: master
Commit: 36abb0c6918c89bdd5a155df21dbd7b4ee0a9dde
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=36abb0c6918c89bdd5a155df21dbd7b4ee0a9dde

Author: Yevhenii Kolesnikov <yevhenii.kolesnikov at globallogic.com>
Date:   Thu Mar 12 19:42:37 2020 +0200

intel/compiler: don't propagate cmp to add if add is saturated

>From the Kaby Lake PRM Vol. 7 "Assigning Conditional Flags":

   * Note that the [post condition signal] bits generated at
     the output of a compute are before the .sat.

Paragraph about post_zero does not mention saturation, but
testing it on actual GPUs shows that conditional modifiers
are applied after saturation.

   * post_zero bit: This bit reflects whether the final
     result is zero after all the clamping, normalizing,
     or format conversion logic.

For signed types we don't care about saturation: it won't
change the result of conditional modifier.

For floating and unsigned types there two special cases,
when we can remove inst even if scan_inst is saturated: G
and LE. Since conditional modifiers are just comparations
against zero, saturating positive values to the upper
limit never changes the result of comparation.

For negative values:
(sat(x) >  0) == (x >  0) --- false
(sat(x) <= 0) == (x <= 0) --- true

Closes: https://gitlab.freedesktop.org/mesa/mesa/issues/2610

Signed-off-by: Yevhenii Kolesnikov <yevhenii.kolesnikov at globallogic.com>
Reviewed-by: Matt Turner <mattst88 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4167>

---

 src/intel/compiler/brw_fs_cmod_propagation.cpp  | 32 ++++++++-
 src/intel/compiler/test_fs_cmod_propagation.cpp | 89 +++++++++++++++++++++++++
 2 files changed, 118 insertions(+), 3 deletions(-)

diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp b/src/intel/compiler/brw_fs_cmod_propagation.cpp
index 743a435871c..415716d098d 100644
--- a/src/intel/compiler/brw_fs_cmod_propagation.cpp
+++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp
@@ -93,18 +93,44 @@ cmod_propagate_cmp_to_add(const gen_device_info *devinfo, bblock_t *block,
              scan_inst->flags_written() != flags_written)
             goto not_match;
 
-         /* From the Sky Lake PRM Vol. 7 "Assigning Conditional Mods":
+         /* From the Kaby Lake PRM Vol. 7 "Assigning Conditional Flags":
           *
           *    * Note that the [post condition signal] bits generated at
           *      the output of a compute are before the .sat.
           *
-          * So we don't have to bail if scan_inst has saturate.
+          * Paragraph about post_zero does not mention saturation, but
+          * testing it on actual GPUs shows that conditional modifiers
+          * are applied after saturation.
+          *
+          *    * post_zero bit: This bit reflects whether the final
+          *      result is zero after all the clamping, normalizing,
+          *      or format conversion logic.
+          *
+          * For signed types we don't care about saturation: it won't
+          * change the result of conditional modifier.
+          *
+          * For floating and unsigned types there two special cases,
+          * when we can remove inst even if scan_inst is saturated: G
+          * and LE. Since conditional modifiers are just comparations
+          * against zero, saturating positive values to the upper
+          * limit never changes the result of comparation.
+          *
+          * For negative values:
+          * (sat(x) >  0) == (x >  0) --- false
+          * (sat(x) <= 0) == (x <= 0) --- true
           */
-         /* Otherwise, try propagating the conditional. */
          const enum brw_conditional_mod cond =
             negate ? brw_swap_cmod(inst->conditional_mod)
             : inst->conditional_mod;
 
+         if (scan_inst->saturate &&
+             (brw_reg_type_is_floating_point(scan_inst->dst.type) ||
+              type_is_unsigned_int(scan_inst->dst.type)) &&
+             (cond != BRW_CONDITIONAL_G &&
+              cond != BRW_CONDITIONAL_LE))
+            goto not_match;
+
+         /* Otherwise, try propagating the conditional. */
          if (scan_inst->can_do_cmod() &&
              ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) ||
               scan_inst->conditional_mod == cond)) {
diff --git a/src/intel/compiler/test_fs_cmod_propagation.cpp b/src/intel/compiler/test_fs_cmod_propagation.cpp
index 0b634a371c8..af9489e2dbc 100644
--- a/src/intel/compiler/test_fs_cmod_propagation.cpp
+++ b/src/intel/compiler/test_fs_cmod_propagation.cpp
@@ -2381,3 +2381,92 @@ TEST_F(cmod_propagation_test, not_to_or_intervening_mismatch_flag_read)
    EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate);
    EXPECT_EQ(1, instruction(block0, 1)->flag_subreg);
 }
+
+TEST_F(cmod_propagation_test, cmp_to_add_float_e)
+{
+   const fs_builder &bld = v->bld;
+   fs_reg dest = v->vgrf(glsl_type::float_type);
+   fs_reg src0 = v->vgrf(glsl_type::float_type);
+   fs_reg neg10(brw_imm_f(-10.0f));
+   fs_reg pos10(brw_imm_f(10.0f));
+
+   bld.ADD(dest, src0, neg10)->saturate = true;
+   bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_EQ);
+
+   /* = Before =
+    * 0: add.sat(8) vgrf0:F, vgrf1:F, -10f
+    * 1: cmp.z.f0.0(8) null:F, vgrf1:F, 10f
+    *
+    * = After =
+    * (no changes)
+    */
+
+   v->calculate_cfg();
+   bblock_t *block0 = v->cfg->blocks[0];
+
+   EXPECT_FALSE(cmod_propagation(v));
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(1, block0->end_ip);
+   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
+   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_EQ, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, cmp_to_add_float_g)
+{
+   const fs_builder &bld = v->bld;
+   fs_reg dest = v->vgrf(glsl_type::float_type);
+   fs_reg src0 = v->vgrf(glsl_type::float_type);
+   fs_reg neg10(brw_imm_f(-10.0f));
+   fs_reg pos10(brw_imm_f(10.0f));
+
+   bld.ADD(dest, src0, neg10)->saturate = true;
+   bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_G);
+
+   /* = Before =
+    * 0: add.sat(8) vgrf0:F, vgrf1:F, -10f
+    * 1: cmp.g.f0.0(8) null:F, vgrf1:F, 10f
+    *
+    * = After =
+    * 0: add.sat.g.f0.0(8) vgrf0:F, vgrf1:F, -10f
+    */
+
+   v->calculate_cfg();
+   bblock_t *block0 = v->cfg->blocks[0];
+
+   EXPECT_TRUE(cmod_propagation(v));
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(0, block0->end_ip);
+   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_G, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, cmp_to_add_float_le)
+{
+   const fs_builder &bld = v->bld;
+   fs_reg dest = v->vgrf(glsl_type::float_type);
+   fs_reg src0 = v->vgrf(glsl_type::float_type);
+   fs_reg neg10(brw_imm_f(-10.0f));
+   fs_reg pos10(brw_imm_f(10.0f));
+
+   bld.ADD(dest, src0, neg10)->saturate = true;
+   bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_LE);
+
+   /* = Before =
+    * 0: add.sat(8) vgrf0:F, vgrf1:F, -10f
+    * 1: cmp.le.f0.0(8) null:F, vgrf1:F, 10f
+    *
+    * = After =
+    * 0: add.sat.le.f0.0(8) vgrf0:F, vgrf1:F, -10f
+    */
+
+   v->calculate_cfg();
+   bblock_t *block0 = v->cfg->blocks[0];
+
+   EXPECT_TRUE(cmod_propagation(v));
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(0, block0->end_ip);
+   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_LE, instruction(block0, 0)->conditional_mod);
+}