Mesa (master): aco: use full-register instructions to implement subdword packing on GFX6/7

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Jun 9 21:50:58 UTC 2020


Module: Mesa
Branch: master
Commit: 942e3c40c30301cc47303bc879fe3b4cca3c8bfd
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=942e3c40c30301cc47303bc879fe3b4cca3c8bfd

Author: Daniel Schürmann <daniel at schuermann.dev>
Date:   Wed May  6 11:58:02 2020 +0100

aco: use full-register instructions to implement subdword packing on GFX6/7

On GFX6/7, there are no SDWA instructions.

Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5226>

---

 src/amd/compiler/aco_lower_to_hw_instr.cpp | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index b12dc4b3798..da78ab4b649 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -1042,6 +1042,29 @@ bool do_copy(lower_context* ctx, Builder& bld, const copy_operation& copy, bool
          *preserve_scc = true;
       } else if (def.bytes() == 8 && def.getTemp().type() == RegType::sgpr) {
          bld.sop1(aco_opcode::s_mov_b64, def, Operand(op.physReg(), s2));
+      } else if (def.regClass().is_subdword() && ctx->program->chip_class < GFX8) {
+         if (op.physReg().byte()) {
+            assert(def.physReg().byte() == 0);
+            bld.vop2(aco_opcode::v_lshrrev_b32, def, Operand(op.physReg().byte() * 8), op);
+         } else if (def.physReg().byte() == 2) {
+            assert(op.physReg().byte() == 0);
+            /* preserve the target's lower half */
+            def = Definition(def.physReg().advance(-2), v1);
+            bld.vop2(aco_opcode::v_and_b32, Definition(op.physReg(), v1), Operand(0xFFFFu), op);
+            if (def.physReg().reg() != op.physReg().reg())
+               bld.vop2(aco_opcode::v_and_b32, def, Operand(0xFFFFu), Operand(def.physReg(), v2b));
+            bld.vop2(aco_opcode::v_cvt_pk_u16_u32, def, Operand(def.physReg(), v2b), op);
+         } else if (def.physReg().byte()) {
+            unsigned bits = def.physReg().byte() * 8;
+            assert(op.physReg().byte() == 0);
+            def = Definition(def.physReg().advance(-def.physReg().byte()), v1);
+            bld.vop2(aco_opcode::v_and_b32, def, Operand((1 << bits) - 1u), Operand(def.physReg(), op.regClass()));
+            bld.vop2(aco_opcode::v_lshlrev_b32, Definition(op.physReg(), def.regClass()), Operand(bits), op);
+            bld.vop2(aco_opcode::v_or_b32, def, Operand(def.physReg(), op.regClass()), op);
+            bld.vop2(aco_opcode::v_lshrrev_b32, Definition(op.physReg(), def.regClass()), Operand(bits), op);
+         } else {
+            bld.vop1(aco_opcode::v_mov_b32, def, op);
+         }
       } else {
          bld.copy(def, op);
       }
@@ -1092,7 +1115,8 @@ void do_swap(lower_context *ctx, Builder& bld, const copy_operation& copy, bool
       Definition op_as_def = Definition(op.physReg(), op.regClass());
       if (ctx->program->chip_class >= GFX9 && def.regClass() == v1) {
          bld.vop1(aco_opcode::v_swap_b32, def, op_as_def, op, def_as_op);
-      } else if (def.regClass() == v1) {
+      } else if (def.regClass() == v1 || (def.regClass().is_subdword() && ctx->program->chip_class < GFX8)) {
+         assert(def.physReg().byte() == 0 && op.physReg().byte() == 0);
          bld.vop2(aco_opcode::v_xor_b32, op_as_def, op, def_as_op);
          bld.vop2(aco_opcode::v_xor_b32, def, op, def_as_op);
          bld.vop2(aco_opcode::v_xor_b32, op_as_def, op, def_as_op);



More information about the mesa-commit mailing list