[Beignet] [PATCH v2 2/2] [OCL20] gbe: add AtomicA64 instructions with stateless access.

xionghu.luo at intel.com xionghu.luo at intel.com
Tue Dec 29 02:26:59 PST 2015


From: Luo Xionghu <xionghu.luo at intel.com>

add SEL_OP_ATOMICA64 for gen8 instruction selection and add
ATOMICA64 for gen8 encoder accordingly, handle both simd8 and simd16
usage. for local type atomic, still use bti 254.

v2: remove useless code in stateless A64 atomic; add mising static
address mode process; remove flag set since only dynamic address mode
need it.
Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
---
 backend/src/backend/gen/gen_mesa_disasm.c          |   2 +-
 backend/src/backend/gen8_context.cpp               |  11 ++
 backend/src/backend/gen8_context.hpp               |   1 +
 backend/src/backend/gen8_encoder.cpp               |  40 ++++++-
 backend/src/backend/gen8_encoder.hpp               |   2 +
 backend/src/backend/gen8_instruction.hpp           |   7 +-
 backend/src/backend/gen_context.cpp                |   3 +
 backend/src/backend/gen_context.hpp                |   1 +
 backend/src/backend/gen_defs.hpp                   |   1 +
 backend/src/backend/gen_encoder.cpp                |   8 ++
 backend/src/backend/gen_encoder.hpp                |   3 +
 .../src/backend/gen_insn_gen7_schedule_info.hxx    |   1 +
 backend/src/backend/gen_insn_selection.cpp         | 133 ++++++++++++++++++++-
 backend/src/backend/gen_insn_selection.hxx         |   1 +
 14 files changed, 202 insertions(+), 12 deletions(-)

diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
index 52dfcd6..82a7524 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -557,7 +557,7 @@ static int gen_version;
 #define UNTYPED_RW_SIMD_MODE(inst) GEN_BITS_FIELD(inst, bits3.gen7_untyped_rw.simd_mode)
 #define UNTYPED_RW_CATEGORY(inst)  GEN_BITS_FIELD(inst, bits3.gen7_untyped_rw.category)
 #define UNTYPED_RW_MSG_TYPE(inst)  GEN_BITS_FIELD(inst, bits3.gen7_untyped_rw.msg_type)
-#define UNTYPED_RW_AOP_TYPE(inst)  GEN_BITS_FIELD(inst, bits3.gen7_atomic_op.aop_type)
+#define UNTYPED_RW_AOP_TYPE(inst)  GEN_BITS_FIELD2(inst, bits3.gen7_atomic_op.aop_type, bits3.gen8_atomic_a64.aop_type)
 #define SCRATCH_RW_OFFSET(inst)    GEN_BITS_FIELD(inst, bits3.gen7_scratch_rw.offset)
 #define SCRATCH_RW_BLOCK_SIZE(inst) GEN_BITS_FIELD(inst, bits3.gen7_scratch_rw.block_size)
 #define SCRATCH_RW_INVALIDATE_AFTER_READ(inst) GEN_BITS_FIELD(inst, bits3.gen7_scratch_rw.invalidate_after_read)
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index f666a20..d19b2c0 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -1029,6 +1029,17 @@ namespace gbe
 
     p->UNTYPED_WRITEA64(addr, elemNum*2);
   }
+  void Gen8Context::emitAtomicA64Instruction(const SelectionInstruction &insn)
+  {
+    const GenRegister src = ra->genReg(insn.src(0));
+    const GenRegister dst = ra->genReg(insn.dst(0));
+    const uint32_t function = insn.extra.function;
+    unsigned srcNum = insn.extra.elem;
+    const GenRegister bti = ra->genReg(insn.src(srcNum));
+    GBE_ASSERT(bti.value.ud == 0xff);
+    p->ATOMICA64(dst, function, src, bti, srcNum);
+  }
+
   void Gen8Context::emitPackLongInstruction(const SelectionInstruction &insn) {
     const GenRegister src = ra->genReg(insn.src(0));
     const GenRegister dst = ra->genReg(insn.dst(0));
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index dbee885..dbe5280 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -74,6 +74,7 @@ namespace gbe
     virtual void emitRead64Instruction(const SelectionInstruction &insn);
     virtual void emitWrite64A64Instruction(const SelectionInstruction &insn);
     virtual void emitRead64A64Instruction(const SelectionInstruction &insn);
+    virtual void emitAtomicA64Instruction(const SelectionInstruction &insn);
     virtual void emitI64MULInstruction(const SelectionInstruction &insn);
     virtual void emitI64DIVREMInstruction(const SelectionInstruction &insn);
 
diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp
index ee5e6ee..c2bec8b 100644
--- a/backend/src/backend/gen8_encoder.cpp
+++ b/backend/src/backend/gen8_encoder.cpp
@@ -123,7 +123,7 @@ namespace gbe
     MOV(GenRegister::retype(dest, GEN_TYPE_HF), GenRegister::retype(src0, GEN_TYPE_F));
   }
   unsigned Gen8Encoder::setAtomicMessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum) {
-    Gen8NativeInstruction *gen8_insn = &insn->gen8_insn;
+    Gen7NativeInstruction *gen8_insn = &insn->gen7_insn;
     uint32_t msg_length = 0;
     uint32_t response_length = 0;
 
@@ -168,6 +168,44 @@ namespace gbe
       this->setSrc1(insn, bti);
     }
   }
+
+  unsigned Gen8Encoder::setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum) {
+    Gen8NativeInstruction *gen8_insn = &insn->gen8_insn;
+    uint32_t msg_length = 0;
+    uint32_t response_length = 0;
+
+    if (this->curr.execWidth == 8) {
+      msg_length = srcNum + 1;
+      response_length = 1;
+    } else if (this->curr.execWidth == 16) {
+      msg_length = 2 * (srcNum + 1);
+      response_length = 2;
+    } else
+      NOT_IMPLEMENTED;
+
+    const GenMessageTarget sfid = GEN_SFID_DATAPORT1_DATA;
+    setMessageDescriptor(insn, sfid, msg_length, response_length);
+    gen8_insn->bits3.gen8_atomic_a64.msg_type = GEN8_P1_UNTYPED_ATOMIC_A64;
+    gen8_insn->bits3.gen8_atomic_a64.bti = bti;
+    gen8_insn->bits3.gen8_atomic_a64.return_data = 1;
+    gen8_insn->bits3.gen8_atomic_a64.aop_type = function;
+    gen8_insn->bits3.gen8_atomic_a64.data_size = 0;
+
+    return gen8_insn->bits3.ud;
+  }
+
+  void Gen8Encoder::ATOMICA64(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum) {
+    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
+
+    this->setHeader(insn);
+    insn->header.destreg_or_condmod = GEN_SFID_DATAPORT_DATA;
+
+    this->setDst(insn, GenRegister::uw16grf(dst.nr, 0));
+    this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0));
+    this->setSrc1(insn, GenRegister::immud(0));
+    setAtomicA64MessageDesc(insn, function, bti.value.ud, srcNum);
+  }
+
   unsigned Gen8Encoder::setUntypedReadMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum) {
     uint32_t msg_length = 0;
     uint32_t response_length = 0;
diff --git a/backend/src/backend/gen8_encoder.hpp b/backend/src/backend/gen8_encoder.hpp
index 8b74278..3e23df6 100644
--- a/backend/src/backend/gen8_encoder.hpp
+++ b/backend/src/backend/gen8_encoder.hpp
@@ -46,6 +46,7 @@ namespace gbe
     virtual void LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double value);
     virtual void LOAD_INT64_IMM(GenRegister dest, GenRegister value);
     virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum);
+    virtual void ATOMICA64(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum);
     virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemNum);
     virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t elemNum);
     virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t elemNum);
@@ -68,6 +69,7 @@ namespace gbe
                             GenRegister src1 = GenRegister::null());
     virtual void handleDouble(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1 = GenRegister::null());
     virtual unsigned setAtomicMessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum);
+    virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum);
     virtual unsigned setUntypedReadMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
     virtual unsigned setUntypedWriteMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
     void setSrc0WithAcc(GenNativeInstruction *insn, GenRegister reg, uint32_t accN);
diff --git a/backend/src/backend/gen8_instruction.hpp b/backend/src/backend/gen8_instruction.hpp
index 1b5dafc..452517c 100644
--- a/backend/src/backend/gen8_instruction.hpp
+++ b/backend/src/backend/gen8_instruction.hpp
@@ -555,16 +555,15 @@ union Gen8NativeInstruction
       struct {
         uint32_t bti:8;
         uint32_t aop_type:4;
-        uint32_t simd_mode:1;
+        uint32_t data_size:1;
         uint32_t return_data:1;
-        uint32_t msg_type:4;
-        uint32_t category:1;
+        uint32_t msg_type:5;
         uint32_t header_present:1;
         uint32_t response_length:5;
         uint32_t msg_length:4;
         uint32_t pad3:2;
         uint32_t end_of_thread:1;
-      } gen7_atomic_op;
+      } gen8_atomic_a64;
 
       // gen8 untyped read/write
       struct {
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index cef4e4c..05359af 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2250,6 +2250,9 @@ namespace gbe
   void GenContext::emitWrite64A64Instruction(const SelectionInstruction &insn) {
     assert(0);
   }
+  void GenContext::emitAtomicA64Instruction(const SelectionInstruction &insn) {
+    assert(0);
+  }
 
   void GenContext::emitUnpackByteInstruction(const SelectionInstruction &insn) {
     const GenRegister src = ra->genReg(insn.src(0));
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 30e1ab0..f050548 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -158,6 +158,7 @@ namespace gbe
     virtual void emitWrite64Instruction(const SelectionInstruction &insn);
     virtual void emitRead64A64Instruction(const SelectionInstruction &insn);
     virtual void emitWrite64A64Instruction(const SelectionInstruction &insn);
+    virtual void emitAtomicA64Instruction(const SelectionInstruction &insn);
     void emitUntypedReadInstruction(const SelectionInstruction &insn);
     void emitUntypedWriteInstruction(const SelectionInstruction &insn);
     virtual void emitUntypedReadA64Instruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index fb43718..586c9a1 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -359,6 +359,7 @@ enum GenMessageTarget {
 
 #define GEN8_P1_BYTE_GATHER_A64       16 //10000
 #define GEN8_P1_UNTYPED_READ_A64      17 //10001
+#define GEN8_P1_UNTYPED_ATOMIC_A64    18 //10010
 #define GEN8_P1_UNTYPED_WRITE_A64     25 //11001
 #define GEN8_P1_BYTE_SCATTER_A64      26 //11010
 
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 7161d49..3f2fdbf 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -405,6 +405,10 @@ namespace gbe
     assert(0);
   }
 
+  void GenEncoder::ATOMICA64(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum) {
+    assert(0);
+  }
+
   void GenEncoder::UNTYPED_WRITE(GenRegister msg, GenRegister bti, uint32_t elemNum) {
     GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
     assert(elemNum >= 1 || elemNum <= 4);
@@ -590,6 +594,10 @@ namespace gbe
       NOT_SUPPORTED;
     return insn->bits3.ud;
   }
+  unsigned GenEncoder::setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum) {
+    GBE_ASSERT(0);
+    return 0;
+  }
 
   void GenEncoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum) {
     GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index f8d81c9..fb478d2 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -168,6 +168,8 @@ namespace gbe
     void WAIT(void);
     /*! Atomic instructions */
     virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum);
+    /*! AtomicA64 instructions */
+    virtual void ATOMICA64(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum);
     /*! Untyped read (upto 4 channels) */
     virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemNum);
     /*! Untyped write (upto 4 channels) */
@@ -237,6 +239,7 @@ namespace gbe
                               unsigned msg_length, unsigned response_length,
                               bool header_present = false, bool end_of_thread = false);
     virtual unsigned setAtomicMessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum);
+    virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum);
     virtual unsigned setUntypedReadMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
     virtual unsigned setUntypedWriteMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
     unsigned setByteGatherMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemSize);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index 15eac79..792014f 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -46,6 +46,7 @@ DECL_GEN7_SCHEDULE(TypedWrite,      80,        1,        1)
 DECL_GEN7_SCHEDULE(SpillReg,        20,        1,        1)
 DECL_GEN7_SCHEDULE(UnSpillReg,      160,       1,        1)
 DECL_GEN7_SCHEDULE(Atomic,          80,        1,        1)
+DECL_GEN7_SCHEDULE(AtomicA64,       80,        1,        1)
 DECL_GEN7_SCHEDULE(I64MUL,          20,        40,      20)
 DECL_GEN7_SCHEDULE(I64SATADD,       20,        40,      20)
 DECL_GEN7_SCHEDULE(I64SATSUB,       20,        40,      20)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index d19f985..09de170 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -187,6 +187,7 @@ namespace gbe
            this->opcode == SEL_OP_READ64          ||
            this->opcode == SEL_OP_READ64A64       ||
            this->opcode == SEL_OP_ATOMIC          ||
+           this->opcode == SEL_OP_ATOMICA64       ||
            this->opcode == SEL_OP_BYTE_GATHER     ||
            this->opcode == SEL_OP_BYTE_GATHERA64  ||
            this->opcode == SEL_OP_SAMPLE          ||
@@ -213,6 +214,7 @@ namespace gbe
            this->opcode == SEL_OP_WRITE64          ||
            this->opcode == SEL_OP_WRITE64A64       ||
            this->opcode == SEL_OP_ATOMIC           ||
+           this->opcode == SEL_OP_ATOMICA64        ||
            this->opcode == SEL_OP_BYTE_SCATTER     ||
            this->opcode == SEL_OP_BYTE_SCATTERA64  ||
            this->opcode == SEL_OP_TYPED_WRITE;
@@ -629,6 +631,8 @@ namespace gbe
     void WAIT(void);
     /*! Atomic instruction */
     void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg src1, Reg src2, GenRegister bti, vector<GenRegister> temps);
+    /*! AtomicA64 instruction */
+    void ATOMICA64(Reg dst, uint32_t function, uint32_t srcNum, vector<GenRegister> src, GenRegister bti, vector<GenRegister> temps);
     /*! Read 64 bits float/int array */
     void READ64(Reg addr, const GenRegister *dst, const GenRegister *tmp, uint32_t elemNum, const GenRegister bti, bool native_long, vector<GenRegister> temps);
     /*! Write 64 bits float/int array */
@@ -1304,6 +1308,33 @@ namespace gbe
     vector->isSrc = 1;
   }
 
+  void Selection::Opaque::ATOMICA64(Reg dst, uint32_t function,
+                                 uint32_t msgPayload, vector<GenRegister> src,
+                                 GenRegister bti,
+                                 vector<GenRegister> temps) {
+    unsigned dstNum = 1 + temps.size();
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_ATOMICA64, dstNum, msgPayload + 1);
+
+    insn->dst(0) = dst;
+    if(temps.size()) {
+      insn->dst(1) = temps[0];
+      insn->dst(2) = temps[1];
+    }
+
+    for (uint32_t elemID = 0; elemID < msgPayload; ++elemID)
+      insn->src(elemID) = src[elemID];
+    insn->src(msgPayload) = bti;
+
+    insn->extra.function = function;
+    insn->extra.elem = msgPayload;
+
+    SelectionVector *vector = this->appendVector();
+    vector->regNum = msgPayload; //bti not included in SelectionVector
+    vector->offsetID = 0;
+    vector->reg = &insn->src(0);
+    vector->isSrc = 1;
+  }
+
   void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); }
   void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); }
   void Selection::Opaque::WAIT(void) { this->appendInsn(SEL_OP_WAIT, 0, 0); }
@@ -5481,34 +5512,124 @@ namespace gbe
           this->opcodes.push_back(ir::Opcode(op));
     }
 
+    /* Used to transform address from 64bit to 32bit, note as dataport messages
+     * cannot accept scalar register, so here to convert to non-uniform
+     * register here. */
+    GenRegister convertU64ToU32(Selection::Opaque &sel,
+                                GenRegister addr) const {
+      GenRegister unpacked = GenRegister::retype(sel.unpacked_ud(addr.reg()), GEN_TYPE_UD);
+      GenRegister dst = sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32);
+      sel.MOV(dst, unpacked);
+      return dst;
+    }
+
+    void untypedAtomicA64Stateless(Selection::Opaque &sel,
+                              const ir::AtomicInstruction &insn,
+                              unsigned msgPayload,
+                              GenRegister dst,
+                              GenRegister addr,
+                              GenRegister src1,
+                              GenRegister src2,
+                              GenRegister bti) const {
+      using namespace ir;
+      GenRegister addrQ;
+      const AtomicOps atomicOp = insn.getAtomicOpcode();
+      GenAtomicOpCode genAtomicOp = (GenAtomicOpCode)atomicOp;
+      unsigned addrBytes = typeSize(addr.type);
+      GBE_ASSERT(msgPayload <= 3);
+
+      unsigned simdWidth = sel.curr.execWidth;
+      AddressMode AM = insn.getAddressMode();
+      if (addrBytes == 4) {
+        addrQ = sel.selReg(sel.reg(ir::FAMILY_QWORD), ir::TYPE_U64);
+        sel.MOV(addrQ, addr);
+      } else {
+        addrQ = addr;
+      }
+
+      if (simdWidth == 8) {
+        vector<GenRegister> msgs;
+        msgs.push_back(addr);
+        msgs.push_back(src1);
+        msgs.push_back(src2);
+        sel.ATOMICA64(dst, genAtomicOp, msgPayload, msgs, bti, sel.getBTITemps(AM));
+      } else if (simdWidth == 16) {
+        vector<GenRegister> msgs;
+        for (unsigned k = 0; k < msgPayload; k++) {
+          msgs.push_back(sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32));
+        }
+        sel.push();
+        /* first quarter */
+        sel.curr.execWidth = 8;
+        sel.curr.quarterControl = GEN_COMPRESSION_Q1;
+        sel.MOV(GenRegister::retype(msgs[0], GEN_TYPE_UL), GenRegister::Qn(addrQ, 0));
+        if(msgPayload > 1)
+          sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 0));
+        if(msgPayload > 2)
+          sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 0));
+        sel.ATOMICA64(GenRegister::Qn(dst, 0), genAtomicOp, msgPayload, msgs, bti, sel.getBTITemps(AM));
+
+        /* second quarter */
+        sel.curr.execWidth = 8;
+        sel.curr.quarterControl = GEN_COMPRESSION_Q2;
+        sel.MOV(GenRegister::retype(msgs[0], GEN_TYPE_UL), GenRegister::Qn(addrQ, 1));
+        if(msgPayload > 1)
+          sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 1));
+        if(msgPayload > 2)
+          sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 1));
+        sel.ATOMICA64(GenRegister::Qn(dst, 1), genAtomicOp, msgPayload, msgs, bti, sel.getBTITemps(AM));
+        sel.pop();
+      }
+    }
+
     INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
       using namespace ir;
       const ir::AtomicInstruction &insn = cast<ir::AtomicInstruction>(dag.insn);
 
-      ir::BTI b;
       const AtomicOps atomicOp = insn.getAtomicOpcode();
       unsigned srcNum = insn.getSrcNum();
       unsigned msgPayload;
+      Register reg = insn.getAddressRegister();
+      GenRegister address = sel.selReg(reg, getType(sel.getRegisterFamily(reg)));
+      AddressSpace addrSpace = insn.getAddressSpace();
+      GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL ||
+                 insn.getAddressSpace() == MEM_PRIVATE ||
+                 insn.getAddressSpace() == MEM_LOCAL ||
+                 insn.getAddressSpace() == MEM_GENERIC ||
+                 insn.getAddressSpace() == MEM_MIXED);
+      unsigned addrBytes = typeSize(address.type);
 
       AddressMode AM = insn.getAddressMode();
       if (AM == AM_DynamicBti) {
-        b.reg = insn.getBtiReg();
         msgPayload = srcNum - 1;
       } else {
-        b.imm = insn.getSurfaceIndex();
-        b.isConst = 1;
         msgPayload = srcNum;
       }
 
       GenRegister dst  = sel.selReg(insn.getDst(0), TYPE_U32);
-      GenRegister bti =  b.isConst ? GenRegister::immud(b.imm) : sel.selReg(b.reg, ir::TYPE_U32);
       GenRegister src0 = sel.selReg(insn.getAddressRegister(), TYPE_U32);
       GenRegister src1 = src0, src2 = src0;
       if(msgPayload > 1) src1 = sel.selReg(insn.getSrc(1), TYPE_U32);
       if(msgPayload > 2) src2 = sel.selReg(insn.getSrc(2), TYPE_U32);
 
       GenAtomicOpCode genAtomicOp = (GenAtomicOpCode)atomicOp;
-      sel.ATOMIC(dst, genAtomicOp, msgPayload, src0, src1, src2, bti, sel.getBTITemps(AM));
+      if (AM == AM_DynamicBti || AM == AM_StaticBti) {
+        if (AM == AM_DynamicBti) {
+          Register btiReg = insn.getBtiReg();
+          sel.ATOMIC(dst, genAtomicOp, msgPayload, address, src1, src2, sel.selReg(btiReg, TYPE_U32), sel.getBTITemps(AM));
+        } else {
+          unsigned SI = insn.getSurfaceIndex();
+          sel.ATOMIC(dst, genAtomicOp, msgPayload, address, src1, src2, GenRegister::immud(SI), sel.getBTITemps(AM));
+        }
+      } else if (addrSpace == ir::MEM_LOCAL) {
+        // stateless mode, local still use bti access
+        GenRegister addrDW = address;
+        if (addrBytes == 8)
+          addrDW = convertU64ToU32(sel, address);
+        sel.ATOMIC(dst, genAtomicOp, msgPayload, addrDW, src1, src2, GenRegister::immud(0xfe), sel.getBTITemps(AM));
+      }
+      else
+        untypedAtomicA64Stateless(sel, insn, msgPayload, dst, address, src1, src2, GenRegister::immud(0xff));
 
       markAllChildren(dag);
       return true;
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 1fbcb1a..f6ed284 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -40,6 +40,7 @@ DECL_SELECTION_IR(I64MUL, I64MULInstruction)
 DECL_SELECTION_IR(I64DIV, I64DIVREMInstruction)
 DECL_SELECTION_IR(I64REM, I64DIVREMInstruction)
 DECL_SELECTION_IR(ATOMIC, AtomicInstruction)
+DECL_SELECTION_IR(ATOMICA64, AtomicA64Instruction)
 DECL_SELECTION_IR(MACH, BinaryInstruction)
 DECL_SELECTION_IR(CMP, CompareInstruction)
 DECL_SELECTION_IR(I64CMP, I64CompareInstruction)
-- 
2.1.4



More information about the Beignet mailing list