Mesa (main): aco: add can_use_DPP() and convert_to_DPP()

Thu Aug 19 18:36:04 UTC 2021

Module: Mesa
Branch: main
Commit: 3db31963794ed713b376a7db917c4f128d78d188
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=3db31963794ed713b376a7db917c4f128d78d188

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Wed Jul 14 17:11:44 2021 +0100

aco: add can_use_DPP() and convert_to_DPP()

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11924>

---

 src/amd/compiler/aco_ir.cpp | 74 +++++++++++++++++++++++++++++++++++++++++++++
 src/amd/compiler/aco_ir.h   |  2 ++
 2 files changed, 76 insertions(+)

diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp
index 79f9d71a793..8900e2f2834 100644
--- a/src/amd/compiler/aco_ir.cpp
+++ b/src/amd/compiler/aco_ir.cpp
@@ -24,6 +24,8 @@
 
 #include "aco_ir.h"
 
+#include "aco_builder.h"
+
 #include "util/debug.h"
 
 #include "c11/threads.h"
@@ -302,6 +304,78 @@ convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& instr)
    return tmp;
 }
 
+bool
+can_use_DPP(const aco_ptr<Instruction>& instr, bool pre_ra)
+{
+   assert(instr->isVALU() && !instr->operands.empty());
+
+   if (instr->isDPP())
+      return true;
+
+   if (instr->operands.size() && instr->operands[0].isLiteral())
+      return false;
+
+   if (instr->isSDWA())
+      return false;
+
+   if (!pre_ra && (instr->isVOPC() || instr->definitions.size() > 1) &&
+       instr->definitions.back().physReg() != vcc)
+      return false;
+
+   if (!pre_ra && instr->operands.size() >= 3 && instr->operands[2].physReg() != vcc)
+      return false;
+
+   if (instr->isVOP3()) {
+      const VOP3_instruction* vop3 = &instr->vop3();
+      if (vop3->clamp || vop3->omod || vop3->opsel)
+         return false;
+      if (instr->format == Format::VOP3)
+         return false;
+   }
+
+   /* there are more cases but those all take 64-bit inputs */
+   return instr->opcode != aco_opcode::v_madmk_f32 && instr->opcode != aco_opcode::v_madak_f32 &&
+          instr->opcode != aco_opcode::v_madmk_f16 && instr->opcode != aco_opcode::v_madak_f16 &&
+          instr->opcode != aco_opcode::v_readfirstlane_b32 &&
+          instr->opcode != aco_opcode::v_cvt_f64_i32 &&
+          instr->opcode != aco_opcode::v_cvt_f64_f32 && instr->opcode != aco_opcode::v_cvt_f64_u32;
+}
+
+aco_ptr<Instruction>
+convert_to_DPP(aco_ptr<Instruction>& instr)
+{
+   if (instr->isDPP())
+      return NULL;
+
+   aco_ptr<Instruction> tmp = std::move(instr);
+   Format format =
+      (Format)(((uint32_t)tmp->format & ~(uint32_t)Format::VOP3) | (uint32_t)Format::DPP);
+   instr.reset(create_instruction<DPP_instruction>(tmp->opcode, format, tmp->operands.size(),
+                                                   tmp->definitions.size()));
+   std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin());
+   for (unsigned i = 0; i < instr->definitions.size(); i++)
+      instr->definitions[i] = tmp->definitions[i];
+
+   DPP_instruction* dpp = &instr->dpp();
+   dpp->dpp_ctrl = dpp_quad_perm(0, 1, 2, 3);
+   dpp->row_mask = 0xf;
+   dpp->bank_mask = 0xf;
+
+   if (tmp->isVOP3()) {
+      const VOP3_instruction* vop3 = &tmp->vop3();
+      memcpy(dpp->neg, vop3->neg, sizeof(dpp->neg));
+      memcpy(dpp->abs, vop3->abs, sizeof(dpp->abs));
+   }
+
+   if (instr->isVOPC() || instr->definitions.size() > 1)
+      instr->definitions.back().setFixed(vcc);
+
+   if (instr->operands.size() >= 3)
+      instr->operands[2].setFixed(vcc);
+
+   return tmp;
+}
+
 bool
 can_use_opsel(chip_class chip, aco_opcode op, int idx, bool high)
 {
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index c375b3857c1..a739e5deebd 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1730,8 +1730,10 @@ bool is_dead(const std::vector<uint16_t>& uses, Instruction* instr);
 
 bool can_use_opsel(chip_class chip, aco_opcode op, int idx, bool high);
 bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra);
+bool can_use_DPP(const aco_ptr<Instruction>& instr, bool pre_ra);
 /* updates "instr" and returns the old instruction (or NULL if no update was needed) */
 aco_ptr<Instruction> convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& instr);
+aco_ptr<Instruction> convert_to_DPP(aco_ptr<Instruction>& instr);
 bool needs_exec_mask(const Instruction* instr);
 
 uint32_t get_reduction_identity(ReduceOp op, unsigned idx);