[Beignet] [ocl2.0 3/4] GBE: Add 64bit data stateless messages

Ruiling Song ruiling.song at intel.com
Thu Oct 29 00:19:17 PDT 2015


Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
 backend/src/backend/gen8_context.cpp               | 34 +++++++++++
 backend/src/backend/gen8_context.hpp               |  2 +
 backend/src/backend/gen_context.cpp                |  6 ++
 backend/src/backend/gen_context.hpp                |  2 +
 .../src/backend/gen_insn_gen7_schedule_info.hxx    |  2 +
 backend/src/backend/gen_insn_selection.cpp         | 71 ++++++++++++++++++++++
 backend/src/backend/gen_insn_selection.hxx         |  2 +
 7 files changed, 119 insertions(+)

diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 898907a..3ede9c8 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -995,6 +995,40 @@ namespace gbe
       afterMessage(insn, bti, tmp, btiTmp, jip0);
     }
   }
+  void Gen8Context::emitRead64A64Instruction(const SelectionInstruction &insn) {
+    const uint32_t elemNum = insn.extra.elem;
+    GBE_ASSERT(elemNum == 1);
+
+    const GenRegister dst = ra->genReg(insn.dst(0));
+    const GenRegister src = ra->genReg(insn.src(0));
+
+    /* Because BDW's store and load send instructions for 64 bits require the bti to be surfaceless,
+       which we can not accept. We just fallback to 2 DW untyperead here. */
+    p->UNTYPED_READA64(dst, src, 2*elemNum);
+
+    for (uint32_t elemID = 0; elemID < elemNum; elemID++) {
+      GenRegister long_tmp = ra->genReg(insn.dst(elemID));
+      GenRegister the_long = ra->genReg(insn.dst(elemID + elemNum));
+      this->packLongVec(long_tmp, the_long, p->curr.execWidth);
+    }
+  }
+
+  void Gen8Context::emitWrite64A64Instruction(const SelectionInstruction &insn)
+  {
+    const uint32_t elemNum = insn.extra.elem;
+    GBE_ASSERT(elemNum == 1);
+    const GenRegister addr = ra->genReg(insn.src(elemNum));
+
+    /* Because BDW's store and load send instructions for 64 bits require the bti to be surfaceless,
+       which we can not accept. We just fallback to 2 DW untypewrite here. */
+    for (uint32_t elemID = 0; elemID < elemNum; elemID++) {
+      GenRegister the_long = ra->genReg(insn.src(elemID));
+      GenRegister long_tmp = ra->genReg(insn.src(elemNum + 1 + elemID));
+      this->unpackLongVec(the_long, long_tmp, p->curr.execWidth);
+    }
+
+    p->UNTYPED_WRITEA64(addr, elemNum*2);
+  }
   void Gen8Context::emitPackLongInstruction(const SelectionInstruction &insn) {
     const GenRegister src = ra->genReg(insn.src(0));
     const GenRegister dst = ra->genReg(insn.dst(0));
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index c076080..c1ede13 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -72,6 +72,8 @@ namespace gbe
     virtual void emitByteScatterA64Instruction(const SelectionInstruction &insn);
     virtual void emitWrite64Instruction(const SelectionInstruction &insn);
     virtual void emitRead64Instruction(const SelectionInstruction &insn);
+    virtual void emitWrite64A64Instruction(const SelectionInstruction &insn);
+    virtual void emitRead64A64Instruction(const SelectionInstruction &insn);
     virtual void emitI64MULInstruction(const SelectionInstruction &insn);
     virtual void emitI64DIVREMInstruction(const SelectionInstruction &insn);
 
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 73f5c12..42bd198 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2223,6 +2223,12 @@ namespace gbe
   void GenContext::emitByteScatterA64Instruction(const SelectionInstruction &insn) {
     assert(0);
   }
+  void GenContext::emitRead64A64Instruction(const SelectionInstruction &insn) {
+    assert(0);
+  }
+  void GenContext::emitWrite64A64Instruction(const SelectionInstruction &insn) {
+    assert(0);
+  }
 
   void GenContext::emitUnpackByteInstruction(const SelectionInstruction &insn) {
     const GenRegister src = ra->genReg(insn.src(0));
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index b2f4a85..1e0d959 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -156,6 +156,8 @@ namespace gbe
     void emitMathInstruction(const SelectionInstruction &insn);
     virtual void emitRead64Instruction(const SelectionInstruction &insn);
     virtual void emitWrite64Instruction(const SelectionInstruction &insn);
+    virtual void emitRead64A64Instruction(const SelectionInstruction &insn);
+    virtual void emitWrite64A64Instruction(const SelectionInstruction &insn);
     void emitUntypedReadInstruction(const SelectionInstruction &insn);
     void emitUntypedWriteInstruction(const SelectionInstruction &insn);
     virtual void emitUntypedReadA64Instruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index 87dce49..63e2fd3 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -26,6 +26,8 @@ DECL_GEN7_SCHEDULE(Barrier,         80,        1,        1)
 DECL_GEN7_SCHEDULE(Fence,           80,        1,        1)
 DECL_GEN7_SCHEDULE(Read64,          80,        1,        1)
 DECL_GEN7_SCHEDULE(Write64,         80,        1,        1)
+DECL_GEN7_SCHEDULE(Read64A64,       80,        1,        1)
+DECL_GEN7_SCHEDULE(Write64A64,      80,        1,        1)
 DECL_GEN7_SCHEDULE(UntypedRead,     160,       1,        1)
 DECL_GEN7_SCHEDULE(UntypedWrite,    160,       1,        1)
 DECL_GEN7_SCHEDULE(UntypedReadA64,  160,       1,        1)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index ef5174e..b160db9 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -185,6 +185,7 @@ namespace gbe
     return this->opcode == SEL_OP_UNTYPED_READ    ||
            this->opcode == SEL_OP_UNTYPED_READA64 ||
            this->opcode == SEL_OP_READ64          ||
+           this->opcode == SEL_OP_READ64A64       ||
            this->opcode == SEL_OP_ATOMIC          ||
            this->opcode == SEL_OP_BYTE_GATHER     ||
            this->opcode == SEL_OP_BYTE_GATHERA64  ||
@@ -210,6 +211,7 @@ namespace gbe
     return this->opcode == SEL_OP_UNTYPED_WRITE    ||
            this->opcode == SEL_OP_UNTYPED_WRITEA64 ||
            this->opcode == SEL_OP_WRITE64          ||
+           this->opcode == SEL_OP_WRITE64A64       ||
            this->opcode == SEL_OP_ATOMIC           ||
            this->opcode == SEL_OP_BYTE_SCATTER     ||
            this->opcode == SEL_OP_BYTE_SCATTERA64  ||
@@ -629,6 +631,10 @@ namespace gbe
     void READ64(Reg addr, const GenRegister *dst, const GenRegister *tmp, uint32_t elemNum, const GenRegister bti, bool native_long, vector<GenRegister> temps);
     /*! Write 64 bits float/int array */
     void WRITE64(Reg addr, const GenRegister *src, const GenRegister *tmp, uint32_t srcNum, GenRegister bti, bool native_long, vector<GenRegister> temps);
+    /*! Read64 A64 */
+    void READ64A64(Reg addr, const GenRegister *dst, const GenRegister *tmp, uint32_t elemNum);
+    /*! write64 a64 */
+    void WRITE64A64(Reg addr, const GenRegister *src, const GenRegister *tmp, uint32_t srcNum);
     /*! Untyped read (up to 4 elements) */
     void UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, GenRegister bti, vector<GenRegister> temps);
     /*! Untyped write (up to 4 elements) */
@@ -1363,6 +1369,39 @@ namespace gbe
     srcVector->reg = &insn->src(0);
   }
 
+  void Selection::Opaque::READ64A64(Reg addr,
+                                 const GenRegister *dst,
+                                 const GenRegister *tmp,
+                                 uint32_t elemNum)
+  {
+    SelectionInstruction *insn = NULL;
+    SelectionVector *srcVector = NULL;
+    SelectionVector *dstVector = NULL;
+    insn = this->appendInsn(SEL_OP_READ64A64,elemNum*2, 1);
+    srcVector = this->appendVector();
+    dstVector = this->appendVector();
+
+    for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+      insn->dst(elemID) = tmp[elemID];
+
+    for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+      insn->dst(elemID + elemNum) = dst[elemID];
+
+    insn->src(0) = addr;
+
+    insn->extra.elem = elemNum;
+
+    dstVector->regNum = elemNum;
+    dstVector->isSrc = 0;
+    dstVector->offsetID = 0;
+    dstVector->reg = &insn->dst(0);
+
+    srcVector->regNum = 1;
+    srcVector->offsetID = 0;
+    srcVector->isSrc = 1;
+    srcVector->reg = &insn->src(0);
+  }
+
   void Selection::Opaque::UNTYPED_READ(Reg addr,
                                        const GenRegister *dst,
                                        uint32_t elemNum,
@@ -1504,6 +1543,38 @@ namespace gbe
     }
   }
 
+  void Selection::Opaque::WRITE64A64(Reg addr,
+                                  const GenRegister *src,
+                                  const GenRegister *tmp,
+                                  uint32_t srcNum)
+  {
+    SelectionVector *vector = NULL;
+    SelectionInstruction *insn = NULL;
+
+    const uint32_t dstNum = srcNum;
+    insn = this->appendInsn(SEL_OP_WRITE64A64, dstNum, srcNum*2 + 1);
+    vector = this->appendVector();
+
+    for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+      insn->src(elemID) = src[elemID];
+
+    insn->src(srcNum) = addr;
+    for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+      insn->src(srcNum + 1 + elemID) = tmp[elemID];
+
+    /* We also need to add the tmp reigster to dst, in order
+       to avoid the post schedule error . */
+    for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+      insn->dst(elemID) = tmp[elemID];
+
+    insn->extra.elem = srcNum;
+
+    vector->regNum = srcNum + 1;
+    vector->offsetID = srcNum;
+    vector->reg = &insn->src(srcNum);
+    vector->isSrc = 1;
+  }
+
   void Selection::Opaque::UNTYPED_WRITE(Reg addr,
                                         const GenRegister *src,
                                         uint32_t elemNum,
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 0ba9fd6..426362a 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -59,6 +59,8 @@ DECL_SELECTION_IR(UNTYPED_READA64, UntypedReadA64Instruction)
 DECL_SELECTION_IR(UNTYPED_WRITEA64, UntypedWriteA64Instruction)
 DECL_SELECTION_IR(READ64, Read64Instruction)
 DECL_SELECTION_IR(WRITE64, Write64Instruction)
+DECL_SELECTION_IR(READ64A64, Read64A64Instruction)
+DECL_SELECTION_IR(WRITE64A64, Write64A64Instruction)
 DECL_SELECTION_IR(BYTE_GATHER, ByteGatherInstruction)
 DECL_SELECTION_IR(BYTE_SCATTER, ByteScatterInstruction)
 DECL_SELECTION_IR(BYTE_GATHERA64, ByteGatherA64Instruction)
-- 
2.3.1



More information about the Beignet mailing list