[Beignet] [PATCH V3] Use a separate pattern for simd shuffle instead of binary pattern

Guo Yejun yejun.guo at intel.com
Fri Jul 17 00:49:44 PDT 2015


the binary pattern assumes the two src operands have the same type,
while simd shuffle is not the case, so add a separate pattern for it.

v2: use a different way to obtain imm data
    do not use GenRegister::udxgrf, use sel.selReg instead
    add SimdShuffleInstruction::wellFormed
v3: refine SimdShuffleInstruction::wellFormed
    set dag.child[0]->root = 1

Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
 backend/src/backend/gen8_context.cpp               | 38 ++++++------
 backend/src/backend/gen8_context.hpp               |  1 +
 backend/src/backend/gen_context.cpp                | 68 ++++++++++++----------
 backend/src/backend/gen_context.hpp                |  1 +
 .../src/backend/gen_insn_gen7_schedule_info.hxx    |  1 +
 backend/src/backend/gen_insn_selection.cpp         | 63 ++++++++++++++++----
 backend/src/backend/gen_insn_selection.hxx         |  2 +-
 backend/src/ir/instruction.cpp                     | 38 +++++++++++-
 backend/src/ir/instruction.hpp                     |  8 +++
 backend/src/ir/instruction.hxx                     |  2 +-
 10 files changed, 158 insertions(+), 64 deletions(-)

diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 66bb54a..b497ee5 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -254,6 +254,27 @@ namespace gbe
     }
   }
 
+  void Gen8Context::emitSimdShuffleInstruction(const SelectionInstruction &insn) {
+    const GenRegister dst = ra->genReg(insn.dst(0));
+    const GenRegister src0 = ra->genReg(insn.src(0));
+    const GenRegister src1 = ra->genReg(insn.src(1));
+    assert(insn.opcode == SEL_OP_SIMD_SHUFFLE);
+
+    uint32_t simd = p->curr.execWidth;
+    if (src1.file == GEN_IMMEDIATE_VALUE) {
+      uint32_t offset = src1.value.ud % simd;
+      GenRegister reg = GenRegister::suboffset(src0, offset);
+      p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type));
+    } else {
+      uint32_t base = src0.nr * 32 + src0.subnr * 4;
+      GenRegister baseReg = GenRegister::immuw(base);
+      const GenRegister a0 = GenRegister::addr8(0);
+      p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+      GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
+      p->MOV(dst, indirect);
+    }
+  }
+
   void Gen8Context::emitBinaryInstruction(const SelectionInstruction &insn) {
     const GenRegister dst = ra->genReg(insn.dst(0));
     const GenRegister src0 = ra->genReg(insn.src(0));
@@ -273,23 +294,6 @@ namespace gbe
         p->ADD(dst, dst, src1);
         break;
       }
-      case SEL_OP_SIMD_SHUFFLE:
-      {
-        uint32_t simd = p->curr.execWidth;
-        if (src1.file == GEN_IMMEDIATE_VALUE) {
-          uint32_t offset = src1.value.ud % simd;
-          GenRegister reg = GenRegister::suboffset(src0, offset);
-          p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type));
-        } else {
-          uint32_t base = src0.nr * 32 + src0.subnr * 4;
-          GenRegister baseReg = GenRegister::immuw(base);
-          const GenRegister a0 = GenRegister::addr8(0);
-          p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
-          GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
-          p->MOV(dst, indirect);
-        }
-        break;
-      }
       default:
         GenContext::emitBinaryInstruction(insn);
     }
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index 4f164ce..84508e9 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -52,6 +52,7 @@ namespace gbe
 
     virtual void emitUnaryInstruction(const SelectionInstruction &insn);
     virtual void emitUnaryWithTempInstruction(const SelectionInstruction &insn);
+    virtual void emitSimdShuffleInstruction(const SelectionInstruction &insn);
     virtual void emitBinaryInstruction(const SelectionInstruction &insn);
     virtual void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
     virtual void emitI64MULHIInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index db27377..e16b0a9 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -545,6 +545,42 @@ namespace gbe
     }
   }
 
+  void GenContext::emitSimdShuffleInstruction(const SelectionInstruction &insn) {
+    const GenRegister dst = ra->genReg(insn.dst(0));
+    const GenRegister src0 = ra->genReg(insn.src(0));
+    const GenRegister src1 = ra->genReg(insn.src(1));
+    assert(insn.opcode == SEL_OP_SIMD_SHUFFLE);
+
+    uint32_t simd = p->curr.execWidth;
+    if (src1.file == GEN_IMMEDIATE_VALUE) {
+      uint32_t offset = src1.value.ud % simd;
+      GenRegister reg = GenRegister::suboffset(src0, offset);
+      p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type));
+    } else {
+      uint32_t base = src0.nr * 32 + src0.subnr * 4;
+      GenRegister baseReg = GenRegister::immuw(base);
+      const GenRegister a0 = GenRegister::addr8(0);
+
+      p->push();
+        if (simd == 8) {
+          p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+          GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
+          p->MOV(dst, indirect);
+        } else if (simd == 16) {
+          p->curr.execWidth = 8;
+          p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+          GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
+          p->MOV(dst, indirect);
+
+          p->curr.quarterControl = 1;
+          p->ADD(a0, GenRegister::unpacked_uw(src1.nr+1, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+          p->MOV(GenRegister::offset(dst, 1, 0), indirect);
+        } else
+          NOT_IMPLEMENTED;
+      p->pop();
+    }
+  }
+
   void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
     const GenRegister dst = ra->genReg(insn.dst(0));
     const GenRegister src0 = ra->genReg(insn.src(0));
@@ -595,38 +631,6 @@ namespace gbe
           p->MOV(xdst.bottom_half(), xsrc1.bottom_half());
         }
         break;
-      case SEL_OP_SIMD_SHUFFLE:
-        {
-          uint32_t simd = p->curr.execWidth;
-          if (src1.file == GEN_IMMEDIATE_VALUE) {
-            uint32_t offset = src1.value.ud % simd;
-            GenRegister reg = GenRegister::suboffset(src0, offset);
-            p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type));
-          } else {
-            uint32_t base = src0.nr * 32 + src0.subnr * 4;
-            GenRegister baseReg = GenRegister::immuw(base);
-            const GenRegister a0 = GenRegister::addr8(0);
-
-            p->push();
-              if (simd == 8) {
-                p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
-                GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
-                p->MOV(dst, indirect);
-              } else if (simd == 16) {
-                p->curr.execWidth = 8;
-                p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
-                GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
-                p->MOV(dst, indirect);
-
-                p->curr.quarterControl = 1;
-                p->ADD(a0, GenRegister::unpacked_uw(src1.nr+1, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
-                p->MOV(GenRegister::offset(dst, 1, 0), indirect);
-              } else
-                NOT_IMPLEMENTED;
-            p->pop();
-          }
-        }
-        break;
       default: NOT_IMPLEMENTED;
     }
   }
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index d387387..69fe513 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -127,6 +127,7 @@ namespace gbe
     virtual void emitUnaryInstruction(const SelectionInstruction &insn);
     virtual void emitUnaryWithTempInstruction(const SelectionInstruction &insn);
     virtual void emitBinaryInstruction(const SelectionInstruction &insn);
+    virtual void emitSimdShuffleInstruction(const SelectionInstruction &insn);
     virtual void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
     void emitTernaryInstruction(const SelectionInstruction &insn);
     virtual void emitI64MULHIInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index d054820..d073770 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -3,6 +3,7 @@ DECL_GEN7_SCHEDULE(Label,           0,         0,        0)
 DECL_GEN7_SCHEDULE(Unary,           20,        4,        2)
 DECL_GEN7_SCHEDULE(UnaryWithTemp,   20,        40,      20)
 DECL_GEN7_SCHEDULE(Binary,          20,        4,        2)
+DECL_GEN7_SCHEDULE(SimdShuffle,     20,        4,        2)
 DECL_GEN7_SCHEDULE(BinaryWithTemp,  20,        40,      20)
 DECL_GEN7_SCHEDULE(Ternary,         20,        4,        2)
 DECL_GEN7_SCHEDULE(I64Shift,        20,        40,      20)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index af5ab9c..b0ba9e3 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -543,7 +543,6 @@ namespace gbe
     ALU1(RNDD)
     ALU1(RNDU)
     ALU2(MACH)
-    ALU2(SIMD_SHUFFLE)
     ALU1(LZD)
     ALU3(MAD)
     ALU2WithTemp(MUL_HI)
@@ -565,6 +564,8 @@ namespace gbe
 #undef ALU2WithTemp
 #undef ALU3
 #undef I64Shift
+    /*! simd shuffle */
+    void SIMD_SHUFFLE(Reg dst, Reg src0, Reg src1);
     /*! Convert 64-bit integer to 32-bit float */
     void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[6]);
     /*! Convert 64-bit integer to 32-bit float */
@@ -1652,6 +1653,14 @@ namespace gbe
     insn->src(2) = src2;
   }
 
+  void Selection::Opaque::SIMD_SHUFFLE(Reg dst, Reg src0, Reg src1)
+  {
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_SIMD_SHUFFLE, 1, 2);
+    insn->dst(0) = dst;
+    insn->src(0) = src0;
+    insn->src(1) = src1;
+  }
+
   void Selection::Opaque::I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3]) {
     SelectionInstruction *insn = this->appendInsn(SEL_OP_I64CMP, 3, 2);
     insn->src(0) = src0;
@@ -2815,17 +2824,6 @@ namespace gbe
         case OP_UPSAMPLE_LONG:
           sel.UPSAMPLE_LONG(dst, src0, src1);
           break;
-        case OP_SIMD_SHUFFLE:
-          {
-            if (src1.file == GEN_IMMEDIATE_VALUE)
-              sel.SIMD_SHUFFLE(dst, src0, src1);
-            else {
-              GenRegister shiftL = GenRegister::udxgrf(sel.curr.execWidth, sel.reg(FAMILY_DWORD));
-              sel.SHL(shiftL, src1, GenRegister::immud(0x2));
-              sel.SIMD_SHUFFLE(dst, src0, shiftL);
-            }
-          }
-          break;
         default: NOT_IMPLEMENTED;
       }
       sel.pop();
@@ -4973,6 +4971,46 @@ namespace gbe
     }
   };
 
+  class SimdShuffleInstructionPattern : public SelectionPattern
+  {
+  public:
+    SimdShuffleInstructionPattern(void) : SelectionPattern(1,1) {
+      this->opcodes.push_back(ir::OP_SIMD_SHUFFLE);
+    }
+    INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
+      using namespace ir;
+      const ir::SimdShuffleInstruction &insn = cast<SimdShuffleInstruction>(dag.insn);
+      assert(insn.getOpcode() == OP_SIMD_SHUFFLE);
+      const Type type = insn.getType();
+      GenRegister dst  = sel.selReg(insn.getDst(0), type);
+      GenRegister src0  = sel.selReg(insn.getSrc(0), type);
+      GenRegister src1;
+
+      SelectionDAG *dag0 = dag.child[0];
+      SelectionDAG *dag1 = dag.child[1];
+      if (dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI && canGetRegisterFromImmediate(dag1->insn)) {
+        const auto &childInsn = cast<LoadImmInstruction>(dag1->insn);
+        src1 = getRegisterFromImmediate(childInsn.getImmediate(), TYPE_U32);
+        if (dag0) dag0->isRoot = 1;
+      } else {
+        markAllChildren(dag);
+        src1 = sel.selReg(insn.getSrc(1), TYPE_U32);
+      }
+
+      sel.push();
+      if (src1.file == GEN_IMMEDIATE_VALUE)
+        sel.SIMD_SHUFFLE(dst, src0, src1);
+      else {
+        GenRegister shiftL = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
+        sel.SHL(shiftL, src1, GenRegister::immud(0x2));
+        sel.SIMD_SHUFFLE(dst, src0, shiftL);
+      }
+      sel.pop();
+      return true;
+    }
+
+  };
+
   /*! Get a region of a register */
   class RegionInstructionPattern : public SelectionPattern
   {
@@ -5247,6 +5285,7 @@ namespace gbe
     this->insert<GetImageInfoInstructionPattern>();
     this->insert<ReadARFInstructionPattern>();
     this->insert<RegionInstructionPattern>();
+    this->insert<SimdShuffleInstructionPattern>();
     this->insert<IndirectMovInstructionPattern>();
     this->insert<NullaryInstructionPattern>();
 
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 79f2ce1..adbb137 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -26,7 +26,7 @@ DECL_SELECTION_IR(SHL, BinaryInstruction)
 DECL_SELECTION_IR(RSR, BinaryInstruction)
 DECL_SELECTION_IR(RSL, BinaryInstruction)
 DECL_SELECTION_IR(ASR, BinaryInstruction)
-DECL_SELECTION_IR(SIMD_SHUFFLE, BinaryInstruction)
+DECL_SELECTION_IR(SIMD_SHUFFLE, SimdShuffleInstruction)
 DECL_SELECTION_IR(I64SHR, I64ShiftInstruction)
 DECL_SELECTION_IR(I64SHL, I64ShiftInstruction)
 DECL_SELECTION_IR(I64ASR, I64ShiftInstruction)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 12d70a6..f93c528 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -741,6 +741,22 @@ namespace ir {
       Register src[0];
     };
 
+    class ALIGNED_INSTRUCTION SimdShuffleInstruction : public NaryInstruction<2>
+    {
+    public:
+      SimdShuffleInstruction(Type type,
+                        Register dst,
+                        Register src0,
+                        Register src1) {
+        this->opcode = OP_SIMD_SHUFFLE;
+        this->type = type;
+        this->dst[0] = dst;
+        this->src[0] = src0;
+        this->src[1] = src1;
+      }
+      INLINE bool wellFormed(const Function &fn, std::string &why) const;
+    };
+
     class ALIGNED_INSTRUCTION RegionInstruction :
       public BasePolicy,
       public NSrcPolicy<RegionInstruction, 1>,
@@ -1154,6 +1170,19 @@ namespace ir {
       return true;
     }
 
+    INLINE bool SimdShuffleInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+    {
+      if (UNLIKELY( this->type != TYPE_U32 && this->type != TYPE_S32 && this->type != TYPE_FLOAT)) {
+        whyNot = "Only support S32/U32/FLOAT type";
+        return false;
+      }
+
+      if (UNLIKELY(checkRegisterData(FAMILY_DWORD, src[1], fn, whyNot) == false))
+        return false;
+
+      return true;
+    }
+
     INLINE bool RegionInstruction::wellFormed(const Function &fn, std::string &whyNot) const
     {
       if (UNLIKELY(checkRegisterData(FAMILY_DWORD, src[0], fn, whyNot) == false))
@@ -1461,6 +1490,10 @@ START_INTROSPECTION(RegionInstruction)
 #include "ir/instruction.hxx"
 END_INTROSPECTION(RegionInstruction)
 
+START_INTROSPECTION(SimdShuffleInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(SimdShuffleInstruction)
+
 START_INTROSPECTION(IndirectMovInstruction)
 #include "ir/instruction.hxx"
 END_INTROSPECTION(IndirectMovInstruction)
@@ -1652,6 +1685,7 @@ DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())
 DECL_MEM_FN(SyncInstruction, uint32_t, getParameters(void), getParameters())
 DECL_MEM_FN(ReadARFInstruction, Type, getType(void), getType())
 DECL_MEM_FN(ReadARFInstruction, ARFRegister, getARFRegister(void), getARFRegister())
+DECL_MEM_FN(SimdShuffleInstruction, Type, getType(void), getType())
 DECL_MEM_FN(RegionInstruction, uint32_t, getOffset(void), getOffset())
 DECL_MEM_FN(IndirectMovInstruction, uint32_t, getOffset(void), getOffset())
 DECL_MEM_FN(IndirectMovInstruction, Type, getType(void), getType())
@@ -1751,7 +1785,6 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex
   DECL_EMIT_FUNCTION(RHADD)
   DECL_EMIT_FUNCTION(I64HADD)
   DECL_EMIT_FUNCTION(I64RHADD)
-  DECL_EMIT_FUNCTION(SIMD_SHUFFLE)
 
 #undef DECL_EMIT_FUNCTION
 
@@ -1881,6 +1914,9 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex
   Instruction REGION(Register dst, Register src, uint32_t offset) {
     return internal::RegionInstruction(dst, src, offset).convert();
   }
+  Instruction SIMD_SHUFFLE(Type type, Register dst, Register src0, Register src1) {
+    return internal::SimdShuffleInstruction(type, dst, src0, src1).convert();
+  }
 
   Instruction INDIRECT_MOV(Type type, Register dst, Register src0, Register src1, uint32_t offset) {
     return internal::IndirectMovInstruction(type, dst, src0, src1, offset).convert();
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index ec4d00d..cf8d839 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -522,6 +522,14 @@ namespace ir {
     static bool isClassOf(const Instruction &insn);
   };
 
+  /*! simd shuffle */
+  class SimdShuffleInstruction : public Instruction {
+  public:
+    Type getType(void) const;
+    /*! Return true if the given instruction is an instance of this class */
+    static bool isClassOf(const Instruction &insn);
+  };
+
   /*! return a region of a register, make sure the offset does not exceed the register size */
   class RegionInstruction : public Instruction {
   public:
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index 1001837..81548c9 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -59,7 +59,7 @@ DECL_INSN(BSB, BinaryInstruction)
 DECL_INSN(OR, BinaryInstruction)
 DECL_INSN(XOR, BinaryInstruction)
 DECL_INSN(AND, BinaryInstruction)
-DECL_INSN(SIMD_SHUFFLE, BinaryInstruction)
+DECL_INSN(SIMD_SHUFFLE, SimdShuffleInstruction)
 DECL_INSN(SEL, SelectInstruction)
 DECL_INSN(EQ, CompareInstruction)
 DECL_INSN(NE, CompareInstruction)
-- 
1.9.1



More information about the Beignet mailing list