Mesa (master): aco/tests: add output modifier tests

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Nov 16 13:18:01 UTC 2020


Module: Mesa
Branch: master
Commit: 2736f974962c293ce76dcf309ab78dfb4b198516
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=2736f974962c293ce76dcf309ab78dfb4b198516

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Fri Nov 13 15:12:35 2020 +0000

aco/tests: add output modifier tests

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7605>

---

 src/amd/compiler/tests/test_optimizer.cpp | 170 ++++++++++++++++++++++++++++++
 1 file changed, 170 insertions(+)

diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp
index 1ad0e92255f..84f77aeaed8 100644
--- a/src/amd/compiler/tests/test_optimizer.cpp
+++ b/src/amd/compiler/tests/test_optimizer.cpp
@@ -81,6 +81,176 @@ BEGIN_TEST(optimize.neg)
    }
 END_TEST
 
+BEGIN_TEST(optimize.output_modifiers)
+   //>> v1: %a, v1: %b, s2: %_:exec = p_startpgm
+   if (!setup_cs("v1 v1", GFX9))
+      return;
+
+   program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
+
+   /* 32-bit modifiers */
+
+   //! v1: %res0 = v_add_f32 %a, %b *0.5
+   //! p_unit_test 0, %res0
+   Temp tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
+   writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x3f000000u), tmp));
+
+   //! v1: %res1 = v_add_f32 %a, %b *2
+   //! p_unit_test 1, %res1
+   tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
+   writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp));
+
+   //! v1: %res2 = v_add_f32 %a, %b *4
+   //! p_unit_test 2, %res2
+   tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
+   writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40800000u), tmp));
+
+   //! v1: %res3 = v_add_f32 %a, %b clamp
+   //! p_unit_test 3, %res3
+   tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
+   writeout(3, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand(0u), Operand(0x3f800000u), tmp));
+
+   //! v1: %res4 = v_add_f32 %a, %b *2 clamp
+   //! p_unit_test 4, %res4
+   tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
+   tmp = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp);
+   writeout(4, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand(0u), Operand(0x3f800000u), tmp));
+
+   /* 16-bit modifiers */
+
+   //! v2b: %res5 = v_add_f16 %a, %b *0.5
+   //! p_unit_test 5, %res5
+   tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
+   writeout(5, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand((uint16_t)0x3800u), tmp));
+
+   //! v2b: %res6 = v_add_f16 %a, %b *2
+   //! p_unit_test 6, %res6
+   tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
+   writeout(6, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand((uint16_t)0x4000u), tmp));
+
+   //! v2b: %res7 = v_add_f16 %a, %b *4
+   //! p_unit_test 7, %res7
+   tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
+   writeout(7, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand((uint16_t)0x4400u), tmp));
+
+   //! v2b: %res8 = v_add_f16 %a, %b clamp
+   //! p_unit_test 8, %res8
+   tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
+   writeout(8, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand((uint16_t)0u), Operand((uint16_t)0x3c00u), tmp));
+
+   //! v2b: %res9 = v_add_f16 %a, %b *2 clamp
+   //! p_unit_test 9, %res9
+   tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
+   tmp = bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand((uint16_t)0x4000), tmp);
+   writeout(9, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand((uint16_t)0u), Operand((uint16_t)0x3c00u), tmp));
+
+   /* clamping is done after omod */
+
+   //! v1: %res10_tmp = v_add_f32 %a, %b clamp
+   //! v1: %res10 = v_mul_f32 2.0, %res10_tmp
+   //! p_unit_test 10, %res10
+   tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
+   tmp = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand(0u), Operand(0x3f800000u), tmp);
+   writeout(10, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp));
+
+   /* unsupported instructions */
+
+   //! v1: %res11_tmp = v_xor_b32 %a, %b
+   //! v1: %res11 = v_mul_f32 2.0, %res11_tmp
+   //! p_unit_test 11, %res11
+   tmp = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), inputs[0], inputs[1]);
+   writeout(11, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp));
+
+   /* several users */
+
+   //! v1: %res12_tmp = v_add_f32 %a, %b
+   //! p_unit_test %res12_tmp
+   //! v1: %res12 = v_mul_f32 2.0, %res12_tmp
+   //! p_unit_test 12, %res12
+   tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
+   bld.pseudo(aco_opcode::p_unit_test, tmp);
+   writeout(12, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp));
+
+   //! v1: %res13 = v_add_f32 %a, %b
+   //! p_unit_test 13, %res13
+   tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
+   bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp);
+   writeout(13, tmp);
+
+   /* omod has no effect if denormals are enabled but clamp is fine */
+
+   //>> BB1
+   //! /* logical preds: / linear preds: / kind: uniform, */
+   program->next_fp_mode.denorm32 = fp_denorm_keep;
+   program->next_fp_mode.denorm16_64 = fp_denorm_flush;
+   bld.reset(program->create_and_insert_block());
+
+   //! v1: %res14_tmp = v_add_f32 %a, %b
+   //! v1: %res14 = v_mul_f32 2.0, %res13_tmp
+   //! p_unit_test 14, %res14
+   tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
+   writeout(14, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp));
+
+   //! v1: %res15 = v_add_f32 %a, %b clamp
+   //! p_unit_test 15, %res15
+   tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
+   writeout(15, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand(0u), Operand(0x3f800000u), tmp));
+
+   //>> BB2
+   //! /* logical preds: / linear preds: / kind: uniform, */
+   program->next_fp_mode.denorm32 = fp_denorm_flush;
+   program->next_fp_mode.denorm16_64 = fp_denorm_keep;
+   bld.reset(program->create_and_insert_block());
+
+   //! v2b: %res16_tmp = v_add_f16 %a, %b
+   //! v2b: %res16 = v_mul_f16 2.0, %res15_tmp
+   //! p_unit_test 16, %res16
+   tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
+   writeout(16, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand((uint16_t)0x4000u), tmp));
+
+   //! v2b: %res17 = v_add_f16 %a, %b clamp
+   //! p_unit_test 17, %res17
+   tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
+   writeout(17, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand((uint16_t)0u), Operand((uint16_t)0x3c00u), tmp));
+
+   /* omod flushes -0.0 to +0.0 */
+
+   //>> BB3
+   //! /* logical preds: / linear preds: / kind: uniform, */
+   program->next_fp_mode.denorm32 = fp_denorm_keep;
+   program->next_fp_mode.denorm16_64 = fp_denorm_keep;
+   program->next_fp_mode.preserve_signed_zero_inf_nan32 = true;
+   program->next_fp_mode.preserve_signed_zero_inf_nan16_64 = false;
+   bld.reset(program->create_and_insert_block());
+
+   //! v1: %res18_tmp = v_add_f32 %a, %b
+   //! v1: %res18 = v_mul_f32 2.0, %res18_tmp
+   //! p_unit_test 18, %res18
+   tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
+   writeout(18, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp));
+   //! v1: %res19 = v_add_f32 %a, %b clamp
+   //! p_unit_test 19, %res19
+   tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]);
+   writeout(19, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand(0u), Operand(0x3f800000u), tmp));
+
+   //>> BB4
+   //! /* logical preds: / linear preds: / kind: uniform, */
+   program->next_fp_mode.preserve_signed_zero_inf_nan32 = false;
+   program->next_fp_mode.preserve_signed_zero_inf_nan16_64 = true;
+   bld.reset(program->create_and_insert_block());
+   //! v2b: %res20_tmp = v_add_f16 %a, %b
+   //! v2b: %res20 = v_mul_f16 2.0, %res20_tmp
+   //! p_unit_test 20, %res20
+   tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
+   writeout(20, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand((uint16_t)0x4000u), tmp));
+   //! v2b: %res21 = v_add_f16 %a, %b clamp
+   //! p_unit_test 21, %res21
+   tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
+   writeout(21, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand((uint16_t)0u), Operand((uint16_t)0x3c00u), tmp));
+
+   finish_opt_test();
+END_TEST
+
 Temp create_subbrev_co(Operand op0, Operand op1, Operand op2)
 {
    return bld.vop2_e64(aco_opcode::v_subbrev_co_u32, bld.def(v1), bld.hint_vcc(bld.def(bld.lm)), op0, op1, op2);



More information about the mesa-commit mailing list