Mesa (staging/20.0): intel/compiler: fix cmod propagation optimisations
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Jun 3 19:22:00 UTC 2020
Module: Mesa
Branch: staging/20.0
Commit: 8c35c65f669706dcc5debad0a3b08f26163933c4
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8c35c65f669706dcc5debad0a3b08f26163933c4
Author: Yevhenii Kolesnikov <yevhenii.kolesnikov at globallogic.com>
Date: Fri Jan 3 16:37:00 2020 +0200
intel/compiler: fix cmod propagation optimisations
Knowing following:
- CMP writes to flag register the result of
applying cmod to the `src0 - src1`.
After that it stores the same value to dst.
Other instructions first store their result to
dst, and then store cmod(dst) to the flag
register.
- inst is either CMP or MOV
- inst->dst is null
- inst->src[0] overlaps with scan_inst->dst
- inst->src[1] is zero
- scan_inst wrote to a flag register
There can be three possible paths:
- scan_inst is CMP:
Considering that src0 is either 0x0 (false),
or 0xffffffff (true), and src1 is 0x0:
- If inst's cmod is NZ, we can always remove
scan_inst: NZ is invariant for false and true. This
holds even if src0 is NaN: .nz is the only cmod,
that returns true for NaN.
- .g is invariant if src0 has a UD type
- .l is invariant if src0 has a D type
- scan_inst and inst have the same cmod:
If scan_inst is anything than CMP, it already
wrote the appropriate value to the flag register.
- else:
We can change cmod of scan_inst to that of inst,
and remove inst. It is valid as long as we make
sure that no instruction uses the flag register
between scan_inst and inst.
Nine new cmod_propagation unit tests:
- cmp_cmpnz
- cmp_cmpg
- plnnz_cmpnz
- plnnz_cmpz (*)
- plnnz_sel_cmpz
- cmp_cmpg_D
- cmp_cmpg_UD (*)
- cmp_cmpl_D (*)
- cmp_cmpl_UD
(*) this would fail without changes to brw_fs_cmod_propagation.
This fixes optimisation that used to be illegal (see issue #2154)
= Before =
0: linterp.z.f0.0(8) vgrf0:F, g2:F, attr0<0>:F
1: cmp.nz.f0.0(8) null:F, vgrf0:F, 0f
= After =
0: linterp.z.f0.0(8) vgrf0:F, g2:F, attr0<0>:F
Now it is optimised as such (note change of cmod in line 0):
= Before =
0: linterp.z.f0.0(8) vgrf0:F, g2:F, attr0<0>:F
1: cmp.nz.f0.0(8) null:F, vgrf0:F, 0f
= After =
0: linterp.nz.f0.0(8) vgrf0:F, g2:F, attr0<0>:F
No shaderdb changes
Closes: https://gitlab.freedesktop.org/mesa/mesa/issues/2154
Signed-off-by: Yevhenii Kolesnikov <yevhenii.kolesnikov at globallogic.com>
Reviewed-by: Matt Turner <mattst88 at gmail.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3348>
(cherry picked from commit 32b7ba66b0156d9fd40b059f20da79a74451f7fd)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5149>
---
src/intel/compiler/brw_fs_cmod_propagation.cpp | 70 +++++-
src/intel/compiler/test_fs_cmod_propagation.cpp | 275 ++++++++++++++++++++++++
2 files changed, 336 insertions(+), 9 deletions(-)
diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp b/src/intel/compiler/brw_fs_cmod_propagation.cpp
index 45ea9206014..8f407563d74 100644
--- a/src/intel/compiler/brw_fs_cmod_propagation.cpp
+++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp
@@ -326,17 +326,69 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block)
}
}
- /* If the instruction generating inst's source also wrote the
- * flag, and inst is doing a simple .nz comparison, then inst
- * is redundant - the appropriate value is already in the flag
- * register. Delete inst.
+ /* Knowing following:
+ * - CMP writes to flag register the result of
+ * applying cmod to the `src0 - src1`.
+ * After that it stores the same value to dst.
+ * Other instructions first store their result to
+ * dst, and then store cmod(dst) to the flag
+ * register.
+ * - inst is either CMP or MOV
+ * - inst->dst is null
+ * - inst->src[0] overlaps with scan_inst->dst
+ * - inst->src[1] is zero
+ * - scan_inst wrote to a flag register
+ *
+ * There can be three possible paths:
+ *
+ * - scan_inst is CMP:
+ *
+ * Considering that src0 is either 0x0 (false),
+ * or 0xffffffff (true), and src1 is 0x0:
+ *
+ * - If inst's cmod is NZ, we can always remove
+ * scan_inst: NZ is invariant for false and true. This
+ * holds even if src0 is NaN: .nz is the only cmod,
+ * that returns true for NaN.
+ *
+ * - .g is invariant if src0 has a UD type
+ *
+ * - .l is invariant if src0 has a D type
+ *
+ * - scan_inst and inst have the same cmod:
+ *
+ * If scan_inst is anything than CMP, it already
+ * wrote the appropriate value to the flag register.
+ *
+ * - else:
+ *
+ * We can change cmod of scan_inst to that of inst,
+ * and remove inst. It is valid as long as we make
+ * sure that no instruction uses the flag register
+ * between scan_inst and inst.
*/
- if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
- !inst->src[0].negate &&
+ if (!inst->src[0].negate &&
scan_inst->flags_written()) {
- inst->remove(block);
- progress = true;
- break;
+ if (scan_inst->opcode == BRW_OPCODE_CMP) {
+ if ((inst->conditional_mod == BRW_CONDITIONAL_NZ) ||
+ (inst->conditional_mod == BRW_CONDITIONAL_G &&
+ inst->src[0].type == BRW_REGISTER_TYPE_UD) ||
+ (inst->conditional_mod == BRW_CONDITIONAL_L &&
+ inst->src[0].type == BRW_REGISTER_TYPE_D)) {
+ inst->remove(block);
+ progress = true;
+ break;
+ }
+ } else if (scan_inst->conditional_mod == inst->conditional_mod) {
+ inst->remove(block);
+ progress = true;
+ break;
+ } else if (!read_flag) {
+ scan_inst->conditional_mod = inst->conditional_mod;
+ inst->remove(block);
+ progress = true;
+ break;
+ }
}
/* The conditional mod of the CMP/CMPN instructions behaves
diff --git a/src/intel/compiler/test_fs_cmod_propagation.cpp b/src/intel/compiler/test_fs_cmod_propagation.cpp
index d05c6897196..bf8a12647c0 100644
--- a/src/intel/compiler/test_fs_cmod_propagation.cpp
+++ b/src/intel/compiler/test_fs_cmod_propagation.cpp
@@ -646,6 +646,281 @@ TEST_F(cmod_propagation_test, andnz_non_one)
EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod);
}
+TEST_F(cmod_propagation_test, cmp_cmpnz)
+{
+ const fs_builder &bld = v->bld;
+
+ fs_reg dst0 = v->vgrf(glsl_type::float_type);
+ fs_reg src0 = v->vgrf(glsl_type::float_type);
+ fs_reg zero(brw_imm_f(0));
+
+ bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ);
+ bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_NZ);
+
+ /* = Before =
+ * 0: cmp.nz.f0.0(8) vgrf0:F, vgrf1:F, 0f
+ * 1: cmp.nz.f0.0(8) null:F, vgrf0:F, 0f
+ *
+ * = After =
+ * 0: cmp.nz.f0.0(8) vgrf0:F, vgrf1:F, 0f
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_TRUE(cmod_propagation(v));
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, cmp_cmpg)
+{
+ const fs_builder &bld = v->bld;
+
+ fs_reg dst0 = v->vgrf(glsl_type::float_type);
+ fs_reg src0 = v->vgrf(glsl_type::float_type);
+ fs_reg zero(brw_imm_f(0));
+
+ bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ);
+ bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_G);
+
+ /* = Before =
+ * 0: cmp.nz.f0.0(8) vgrf0:F, vgrf1:F, 0f
+ * 1: cmp.g.f0.0(8) null:F, vgrf0:F, 0f
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_FALSE(cmod_propagation(v));
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_G, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, plnnz_cmpnz)
+{
+ const fs_builder &bld = v->bld;
+
+ fs_reg dst0 = v->vgrf(glsl_type::float_type);
+ fs_reg src0 = v->vgrf(glsl_type::float_type);
+ fs_reg zero(brw_imm_f(0));
+
+ set_condmod(BRW_CONDITIONAL_NZ, bld.PLN(dst0, src0, zero));
+ bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_NZ);
+
+ /* = Before =
+ * 0: pln.nz.f0.0(8) vgrf0:F, vgrf1:F, 0f
+ * 1: cmp.nz.f0.0(8) null:F, vgrf0:F, 0f
+ *
+ * = After =
+ * 0: pln.nz.f0.0(8) vgrf0:F, vgrf1:F, 0f
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_TRUE(cmod_propagation(v));
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_PLN, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, plnnz_cmpz)
+{
+ const fs_builder &bld = v->bld;
+
+ fs_reg dst0 = v->vgrf(glsl_type::float_type);
+ fs_reg src0 = v->vgrf(glsl_type::float_type);
+ fs_reg zero(brw_imm_f(0));
+
+ set_condmod(BRW_CONDITIONAL_NZ, bld.PLN(dst0, src0, zero));
+ bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_Z);
+
+ /* = Before =
+ * 0: pln.nz.f0.0(8) vgrf0:F, vgrf1:F, 0f
+ * 1: cmp.z.f0.0(8) null:F, vgrf0:F, 0f
+ *
+ * = After =
+ * 0: pln.z.f0.0(8) vgrf0:F, vgrf1:F, 0f
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_TRUE(cmod_propagation(v));
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_PLN, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_Z, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, plnnz_sel_cmpz)
+{
+ const fs_builder &bld = v->bld;
+
+ fs_reg dst0 = v->vgrf(glsl_type::float_type);
+ fs_reg dst1 = v->vgrf(glsl_type::float_type);
+ fs_reg src0 = v->vgrf(glsl_type::float_type);
+ fs_reg zero(brw_imm_f(0));
+
+ set_condmod(BRW_CONDITIONAL_NZ, bld.PLN(dst0, src0, zero));
+ set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dst1, src0, zero));
+ bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_Z);
+
+ /* = Before =
+ * 0: pln.nz.f0.0(8) vgrf0:F, vgrf2:F, 0f
+ * 1: (+f0.0) sel(8) vgrf1:F, vgrf2:F, 0f
+ * 2: cmp.z.f0.0(8) null:F, vgrf0:F, 0f
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_FALSE(cmod_propagation(v));
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(2, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_PLN, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_Z, instruction(block0, 2)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, cmp_cmpg_D)
+{
+ const fs_builder &bld = v->bld;
+
+ fs_reg dst0 = v->vgrf(glsl_type::int_type);
+ fs_reg src0 = v->vgrf(glsl_type::int_type);
+ fs_reg zero(brw_imm_d(0));
+ fs_reg one(brw_imm_d(1));
+
+ bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ);
+ bld.CMP(bld.null_reg_d(), dst0, zero, BRW_CONDITIONAL_G);
+
+ /* = Before =
+ * 0: cmp.nz.f0.0(8) vgrf0:D, vgrf1:D, 0d
+ * 1: cmp.g.f0.0(8) null:D, vgrf0:D, 0d
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_FALSE(cmod_propagation(v));
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_G, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, cmp_cmpg_UD)
+{
+ const fs_builder &bld = v->bld;
+
+ fs_reg dst0 = v->vgrf(glsl_type::uint_type);
+ fs_reg src0 = v->vgrf(glsl_type::uint_type);
+ fs_reg zero(brw_imm_ud(0));
+
+ bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ);
+ bld.CMP(bld.null_reg_ud(), dst0, zero, BRW_CONDITIONAL_G);
+
+ /* = Before =
+ * 0: cmp.nz.f0.0(8) vgrf0:UD, vgrf1:UD, 0u
+ * 1: cmp.g.f0.0(8) null:UD, vgrf0:UD, 0u
+ *
+ * = After =
+ * 0: cmp.nz.f0.0(8) vgrf0:UD, vgrf1:UD, 0u
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_TRUE(cmod_propagation(v));
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, cmp_cmpl_D)
+{
+ const fs_builder &bld = v->bld;
+
+ fs_reg dst0 = v->vgrf(glsl_type::int_type);
+ fs_reg src0 = v->vgrf(glsl_type::int_type);
+ fs_reg zero(brw_imm_d(0));
+
+ bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ);
+ bld.CMP(bld.null_reg_d(), dst0, zero, BRW_CONDITIONAL_L);
+
+ /* = Before =
+ * 0: cmp.nz.f0.0(8) vgrf0:D, vgrf1:D, 0d
+ * 1: cmp.l.f0.0(8) null:D, vgrf0:D, 0d
+ *
+ * = After =
+ * 0: cmp.nz.f0.0(8) vgrf0:D, vgrf1:D, 0d
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_TRUE(cmod_propagation(v));
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, cmp_cmpl_UD)
+{
+ const fs_builder &bld = v->bld;
+
+ fs_reg dst0 = v->vgrf(glsl_type::uint_type);
+ fs_reg src0 = v->vgrf(glsl_type::uint_type);
+ fs_reg zero(brw_imm_ud(0));
+
+ bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ);
+ bld.CMP(bld.null_reg_ud(), dst0, zero, BRW_CONDITIONAL_L);
+
+ /* = Before =
+ * 0: cmp.nz.f0.0(8) vgrf0:UD, vgrf1:UD, 0u
+ * 1: cmp.l.f0.0(8) null:UD, vgrf0:UD, 0u
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_FALSE(cmod_propagation(v));
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 1)->conditional_mod);
+}
+
TEST_F(cmod_propagation_test, andz_one)
{
const fs_builder &bld = v->bld;
More information about the mesa-commit
mailing list