Mesa (master): aco: add VOP3P_instruction

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Apr 22 13:35:45 UTC 2020


Module: Mesa
Branch: master
Commit: 83fdb1ed3dd13228bcb761a4a4532b67a24a682b
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=83fdb1ed3dd13228bcb761a4a4532b67a24a682b

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Fri Apr 10 17:28:33 2020 +0100

aco: add VOP3P_instruction

The optimizer isn't yet updated to handle this, since lower_to_hw_instr
will be the only user for now.

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4469>

---

 src/amd/compiler/aco_assembler.cpp | 28 +++++++++++++++++++++++
 src/amd/compiler/aco_ir.h          | 46 ++++++++++++++++++++++++++------------
 src/amd/compiler/aco_opcodes.py    |  8 +++----
 src/amd/compiler/aco_print_ir.cpp  | 22 +++++++++++++++++-
 4 files changed, 85 insertions(+), 19 deletions(-)

diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp
index aa6402c93b4..ed2e3982976 100644
--- a/src/amd/compiler/aco_assembler.cpp
+++ b/src/amd/compiler/aco_assembler.cpp
@@ -554,6 +554,34 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
             encoding |= vop3->neg[i] << (29+i);
          out.push_back(encoding);
 
+      } else if (instr->format == Format::VOP3P) {
+         VOP3P_instruction* vop3 = static_cast<VOP3P_instruction*>(instr);
+
+         uint32_t encoding;
+         if (ctx.chip_class == GFX9) {
+            encoding = (0b110100111 << 23);
+         } else if (ctx.chip_class == GFX10) {
+            encoding = (0b110011 << 26);
+         } else {
+            unreachable("Unknown chip_class.");
+         }
+
+         encoding |= opcode << 16;
+         encoding |= (vop3->clamp ? 1 : 0) << 15;
+         encoding |= vop3->opsel_lo << 11;
+         encoding |= (vop3->opsel_hi & 0x4) ? 1 : 0 << 14;
+         for (unsigned i = 0; i < 3; i++)
+            encoding |= vop3->neg_hi[i] << (8+i);
+         encoding |= (0xFF & instr->definitions[0].physReg());
+         out.push_back(encoding);
+         encoding = 0;
+         for (unsigned i = 0; i < instr->operands.size(); i++)
+            encoding |= instr->operands[i].physReg() << (i * 9);
+         encoding |= vop3->opsel_hi & 0x3 << 27;
+         for (unsigned i = 0; i < 3; i++)
+            encoding |= vop3->neg_lo[i] << (29+i);
+         out.push_back(encoding);
+
       } else if (instr->isDPP()){
          assert(ctx.chip_class >= GFX8);
          /* first emit the instruction without the DPP operand */
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index fb0c9beb208..2d5387c1849 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -90,17 +90,17 @@ enum class Format : std::uint16_t {
    PSEUDO_REDUCTION = 18,
 
    /* Vector ALU Formats */
+   VOP3P = 19,
    VOP1 = 1 << 8,
    VOP2 = 1 << 9,
    VOPC = 1 << 10,
    VOP3 = 1 << 11,
    VOP3A = 1 << 11,
    VOP3B = 1 << 11,
-   VOP3P = 1 << 12,
    /* Vector Parameter Interpolation Format */
-   VINTRP = 1 << 13,
-   DPP = 1 << 14,
-   SDWA = 1 << 15,
+   VINTRP = 1 << 12,
+   DPP = 1 << 13,
+   SDWA = 1 << 14,
 };
 
 enum barrier_interaction : uint8_t {
@@ -755,7 +755,7 @@ struct Instruction {
           || ((uint16_t) format & (uint16_t) Format::VOPC) == (uint16_t) Format::VOPC
           || ((uint16_t) format & (uint16_t) Format::VOP3A) == (uint16_t) Format::VOP3A
           || ((uint16_t) format & (uint16_t) Format::VOP3B) == (uint16_t) Format::VOP3B
-          || ((uint16_t) format & (uint16_t) Format::VOP3P) == (uint16_t) Format::VOP3P;
+          || format == Format::VOP3P;
    }
 
    constexpr bool isSALU() const noexcept
@@ -782,8 +782,7 @@ struct Instruction {
    constexpr bool isVOP3() const noexcept
    {
       return ((uint16_t) format & (uint16_t) Format::VOP3A) ||
-             ((uint16_t) format & (uint16_t) Format::VOP3B) ||
-             format == Format::VOP3P;
+             ((uint16_t) format & (uint16_t) Format::VOP3B);
    }
 
    constexpr bool isSDWA() const noexcept
@@ -877,6 +876,16 @@ struct VOP3A_instruction : public Instruction {
 };
 static_assert(sizeof(VOP3A_instruction) == sizeof(Instruction) + 8);
 
+struct VOP3P_instruction : public Instruction {
+   bool neg_lo[3];
+   bool neg_hi[3];
+   uint8_t opsel_lo : 3;
+   uint8_t opsel_hi : 3;
+   bool clamp : 1;
+   uint32_t padding : 9;
+};
+static_assert(sizeof(VOP3P_instruction) == sizeof(Instruction) + 8);
+
 /**
  * Data Parallel Primitives Format:
  * This format can be used for VOP1, VOP2 or VOPC instructions.
@@ -1172,14 +1181,23 @@ constexpr bool Instruction::usesModifiers() const noexcept
 {
    if (isDPP() || isSDWA())
       return true;
-   if (!isVOP3())
-      return false;
-   const VOP3A_instruction *vop3 = static_cast<const VOP3A_instruction*>(this);
-   for (unsigned i = 0; i < operands.size(); i++) {
-      if (vop3->abs[i] || vop3->neg[i])
-         return true;
+
+   if (format == Format::VOP3P) {
+      const VOP3P_instruction *vop3p = static_cast<const VOP3P_instruction*>(this);
+      for (unsigned i = 0; i < operands.size(); i++) {
+         if (vop3p->neg_lo[i] || vop3p->neg_hi[i])
+            return true;
+      }
+      return vop3p->opsel_lo || vop3p->opsel_hi || vop3p->clamp;
+   } else if (isVOP3()) {
+      const VOP3A_instruction *vop3 = static_cast<const VOP3A_instruction*>(this);
+      for (unsigned i = 0; i < operands.size(); i++) {
+         if (vop3->abs[i] || vop3->neg[i])
+            return true;
+      }
+      return vop3->opsel || vop3->clamp || vop3->omod;
    }
-   return vop3->opsel || vop3->clamp || vop3->omod;
+   return false;
 }
 
 constexpr bool is_phi(Instruction* instr)
diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py
index a0bc601192c..3fb755f0c7c 100644
--- a/src/amd/compiler/aco_opcodes.py
+++ b/src/amd/compiler/aco_opcodes.py
@@ -49,15 +49,15 @@ class Format(Enum):
    PSEUDO_BRANCH = 16
    PSEUDO_BARRIER = 17
    PSEUDO_REDUCTION = 18
+   VOP3P = 19
    VOP1 = 1 << 8
    VOP2 = 1 << 9
    VOPC = 1 << 10
    VOP3A = 1 << 11
    VOP3B = 1 << 11
-   VOP3P = 1 << 12
-   VINTRP = 1 << 13
-   DPP = 1 << 14
-   SDWA = 1 << 15
+   VINTRP = 1 << 12
+   DPP = 1 << 13
+   SDWA = 1 << 14
 
    def get_builder_fields(self):
       if self == Format.SOPK:
diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp
index fb771aafa37..b3fcb74cd58 100644
--- a/src/amd/compiler/aco_print_ir.cpp
+++ b/src/amd/compiler/aco_print_ir.cpp
@@ -480,6 +480,11 @@ static void print_instr_format_specific(struct Instruction *instr, FILE *output)
       print_barrier_reorder(mtbuf->can_reorder, mtbuf->barrier, output);
       break;
    }
+   case Format::VOP3P: {
+      if (static_cast<VOP3P_instruction*>(instr)->clamp)
+         fprintf(output, " clamp");
+      break;
+   }
    default: {
       break;
    }
@@ -652,7 +657,22 @@ void aco_print_instr(struct Instruction *instr, FILE *output)
          }
          if (abs[i])
             fprintf(output, "|");
-       }
+
+         if (instr->format == Format::VOP3P) {
+            VOP3P_instruction* vop3 = static_cast<VOP3P_instruction*>(instr);
+            if ((vop3->opsel_lo & (1 << i)) || !(vop3->opsel_hi & (1 << i))) {
+               fprintf(output, ".%c%c",
+                       vop3->opsel_lo & (1 << i) ? 'y' : 'x',
+                       vop3->opsel_hi & (1 << i) ? 'y' : 'x');
+            }
+            if (vop3->neg_lo[i] && vop3->neg_hi[i])
+               fprintf(output, "*[-1,-1]");
+            else if (vop3->neg_lo[i])
+               fprintf(output, "*[-1,1]");
+            else if (vop3->neg_hi[i])
+               fprintf(output, "*[1,-1]");
+         }
+      }
    }
    print_instr_format_specific(instr, output);
 }



More information about the mesa-commit mailing list