Mesa (main): aco: remove explicit dst_preserve flag

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Sep 2 19:00:40 UTC 2021


Module: Mesa
Branch: main
Commit: 0988f7b9bacb7a9baf2a70771827eaaa50e61102
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=0988f7b9bacb7a9baf2a70771827eaaa50e61102

Author: Daniel Schürmann <daniel at schuermann.dev>
Date:   Wed Sep  1 14:19:33 2021 +0200

aco: remove explicit dst_preserve flag

Instead, we can rely on the fact that subdword definitions
must preserve the unused bits while dword definitions either
pad or sign-extend.

Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12640>

---

 src/amd/compiler/aco_assembler.cpp             | 2 +-
 src/amd/compiler/aco_instruction_selection.cpp | 1 -
 src/amd/compiler/aco_ir.cpp                    | 1 -
 src/amd/compiler/aco_ir.h                      | 3 +--
 src/amd/compiler/aco_lower_to_hw_instr.cpp     | 1 -
 src/amd/compiler/aco_opcodes.py                | 1 -
 src/amd/compiler/aco_opt_value_numbering.cpp   | 4 ++--
 src/amd/compiler/aco_optimizer.cpp             | 1 -
 src/amd/compiler/aco_print_ir.cpp              | 2 +-
 src/amd/compiler/aco_validate.cpp              | 7 +++++--
 src/amd/compiler/tests/test_sdwa.cpp           | 4 +---
 11 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp
index 3ae5860d857..e520da50fec 100644
--- a/src/amd/compiler/aco_assembler.cpp
+++ b/src/amd/compiler/aco_assembler.cpp
@@ -704,7 +704,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
          } else {
             encoding |= sdwa.dst_sel.to_sdwa_sel(instr->definitions[0].physReg().byte()) << 8;
             uint32_t dst_u = sdwa.dst_sel.sign_extend() ? 1 : 0;
-            if (sdwa.dst_preserve)
+            if (instr->definitions[0].bytes() < 4) /* dst_preserve */
                dst_u = 2;
             encoding |= dst_u << 11;
             encoding |= (sdwa.clamp ? 1 : 0) << 13;
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index f8d16f0337d..32b54c8d14f 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -646,7 +646,6 @@ convert_int(isel_context* ctx, Builder& bld, Temp src, unsigned src_bits, unsign
       sdwa->definitions[0] = Definition(tmp);
       sdwa->sel[0] = SubdwordSel(src_bits / 8, 0, sign_extend);
       sdwa->dst_sel = tmp.bytes() == 2 ? SubdwordSel::uword : SubdwordSel::dword;
-      sdwa->dst_preserve = tmp.bytes() == 2;
       bld.insert(std::move(sdwa));
    } else {
       assert(src_bits < 32);
diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp
index 5f51181da3d..6a8f8d2b808 100644
--- a/src/amd/compiler/aco_ir.cpp
+++ b/src/amd/compiler/aco_ir.cpp
@@ -280,7 +280,6 @@ convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& instr)
    }
 
    sdwa.dst_sel = SubdwordSel(instr->definitions[0].bytes(), 0, false);
-   sdwa.dst_preserve = sdwa.dst_sel.size() < 4;
 
    if (instr->definitions[0].getTemp().type() == RegType::sgpr && chip == GFX8)
       instr->definitions[0].setFixed(vcc);
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 624b8ff23a1..525f6c21a85 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1469,10 +1469,9 @@ struct SDWA_instruction : public Instruction {
    SubdwordSel dst_sel;
    bool neg[2];
    bool abs[2];
-   bool dst_preserve : 1;
    bool clamp : 1;
    uint8_t omod : 2; /* GFX9+ */
-   uint8_t padding : 4;
+   uint8_t padding : 5;
 };
 static_assert(sizeof(SDWA_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
 
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index e4af6fd153b..8de0c5c2fb0 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -2097,7 +2097,6 @@ lower_to_hw_instr(Program* program)
                   sdwa->definitions[0] = dst;
                   sdwa->sel[0] = SubdwordSel(1, op.physReg().byte() + offset / 8, signext);
                   sdwa->dst_sel = SubdwordSel::uword;
-                  sdwa->dst_preserve = true;
                   bld.insert(std::move(sdwa));
                }
                break;
diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py
index 33ba9346682..426c97c2069 100644
--- a/src/amd/compiler/aco_opcodes.py
+++ b/src/amd/compiler/aco_opcodes.py
@@ -180,7 +180,6 @@ class Format(Enum):
          for i in range(min(num_operands, 2)):
             res += 'instr->sel[{0}] = SubdwordSel(op{0}.op.bytes(), 0, false);'.format(i)
          res += 'instr->dst_sel = SubdwordSel(def0.bytes(), 0, false);\n'
-         res += 'if (def0.bytes() < 4) instr->dst_preserve = true;'
       return res
 
 
diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp
index 32c0bd8a120..d650665e0d0 100644
--- a/src/amd/compiler/aco_opt_value_numbering.cpp
+++ b/src/amd/compiler/aco_opt_value_numbering.cpp
@@ -187,8 +187,8 @@ struct InstrPred {
          return aSDWA.sel[0] == bSDWA.sel[0] && aSDWA.sel[1] == bSDWA.sel[1] &&
                 aSDWA.dst_sel == bSDWA.dst_sel && aSDWA.abs[0] == bSDWA.abs[0] &&
                 aSDWA.abs[1] == bSDWA.abs[1] && aSDWA.neg[0] == bSDWA.neg[0] &&
-                aSDWA.neg[1] == bSDWA.neg[1] && aSDWA.dst_preserve == bSDWA.dst_preserve &&
-                aSDWA.clamp == bSDWA.clamp && aSDWA.omod == bSDWA.omod;
+                aSDWA.neg[1] == bSDWA.neg[1] && aSDWA.clamp == bSDWA.clamp &&
+                aSDWA.omod == bSDWA.omod;
       }
 
       switch (a->format) {
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index f461932bfeb..933d56894bf 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -2079,7 +2079,6 @@ combine_inverse_comparison(opt_ctx& ctx, aco_ptr<Instruction>& instr)
       memcpy(new_sdwa->sel, cmp_sdwa.sel, sizeof(new_sdwa->sel));
       memcpy(new_sdwa->neg, cmp_sdwa.neg, sizeof(new_sdwa->neg));
       new_sdwa->dst_sel = cmp_sdwa.dst_sel;
-      new_sdwa->dst_preserve = cmp_sdwa.dst_preserve;
       new_sdwa->clamp = cmp_sdwa.clamp;
       new_sdwa->omod = cmp_sdwa.omod;
       new_instr = new_sdwa;
diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp
index 15e504feb2a..618f8b6b03b 100644
--- a/src/amd/compiler/aco_print_ir.cpp
+++ b/src/amd/compiler/aco_print_ir.cpp
@@ -638,7 +638,7 @@ print_instr_format_specific(const Instruction* instr, FILE* output)
          default: break;
          }
       }
-      if (sdwa.dst_preserve)
+      if (instr->definitions[0].bytes() < 4)
          fprintf(output, " dst_preserve");
    }
 }
diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp
index 022b2fd6bab..3e1603e6ca1 100644
--- a/src/amd/compiler/aco_validate.cpp
+++ b/src/amd/compiler/aco_validate.cpp
@@ -174,8 +174,11 @@ validate_ir(Program* program)
                   "SDWA definition selection size must be 1, 2 or 4 bytes", instr.get());
                check(sdwa.dst_sel.offset() % sdwa.dst_sel.size() == 0, "Invalid selection offset",
                      instr.get());
-               check(def.bytes() == 4 || sdwa.dst_preserve,
-                     "SDWA subdword definition needs dst_preserve", instr.get());
+               check(def.bytes() == 4 || def.bytes() == sdwa.dst_sel.size(),
+                     "SDWA dst_sel size must be definition size for subdword definitions",
+                     instr.get());
+               check(def.bytes() == 4 || sdwa.dst_sel.offset() == 0,
+                     "SDWA dst_sel offset must be 0 for subdword definitions", instr.get());
             }
 
             for (unsigned i = 0; i < std::min<unsigned>(2, instr->operands.size()); i++) {
diff --git a/src/amd/compiler/tests/test_sdwa.cpp b/src/amd/compiler/tests/test_sdwa.cpp
index 2bd002ed56b..de46ee755e1 100644
--- a/src/amd/compiler/tests/test_sdwa.cpp
+++ b/src/amd/compiler/tests/test_sdwa.cpp
@@ -37,9 +37,7 @@ BEGIN_TEST(validate.sdwa.allow)
       SDWA_instruction *sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa();
       sdwa->neg[0] = sdwa->neg[1] = sdwa->abs[0] = sdwa->abs[1] = true;
 
-      sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa();
-      sdwa->dst_preserve = true;
-      sdwa->dst_sel = SubdwordSel::ubyte0;
+      sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1b), inputs[0], inputs[1]).instr->sdwa();
 
       sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa();
       sdwa->sel[0] = SubdwordSel::sbyte2;



More information about the mesa-commit mailing list