[Beignet] [PATCH 2/2] [OCL20] gbe: add AtomicA64 instructions with stateless access.
xionghu.luo at intel.com
xionghu.luo at intel.com
Thu Dec 24 03:00:02 PST 2015
From: Luo Xionghu <xionghu.luo at intel.com>
add SEL_OP_ATOMICA64 for gen8 instruction selection and add
ATOMICA64 for gen8 encoder accordingly, handle both simd8 and simd16
usage. for local type atomic, still use bti 254.
Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
---
backend/src/backend/gen/gen_mesa_disasm.c | 2 +-
backend/src/backend/gen8_context.cpp | 27 +++++
backend/src/backend/gen8_context.hpp | 1 +
backend/src/backend/gen8_encoder.cpp | 44 ++++++-
backend/src/backend/gen8_encoder.hpp | 2 +
backend/src/backend/gen8_instruction.hpp | 7 +-
backend/src/backend/gen_context.cpp | 3 +
backend/src/backend/gen_context.hpp | 1 +
backend/src/backend/gen_defs.hpp | 1 +
backend/src/backend/gen_encoder.cpp | 8 ++
backend/src/backend/gen_encoder.hpp | 3 +
.../src/backend/gen_insn_gen7_schedule_info.hxx | 1 +
backend/src/backend/gen_insn_selection.cpp | 126 ++++++++++++++++++++-
backend/src/backend/gen_insn_selection.hxx | 1 +
14 files changed, 220 insertions(+), 7 deletions(-)
diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
index 52dfcd6..82a7524 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -557,7 +557,7 @@ static int gen_version;
#define UNTYPED_RW_SIMD_MODE(inst) GEN_BITS_FIELD(inst, bits3.gen7_untyped_rw.simd_mode)
#define UNTYPED_RW_CATEGORY(inst) GEN_BITS_FIELD(inst, bits3.gen7_untyped_rw.category)
#define UNTYPED_RW_MSG_TYPE(inst) GEN_BITS_FIELD(inst, bits3.gen7_untyped_rw.msg_type)
-#define UNTYPED_RW_AOP_TYPE(inst) GEN_BITS_FIELD(inst, bits3.gen7_atomic_op.aop_type)
+#define UNTYPED_RW_AOP_TYPE(inst) GEN_BITS_FIELD2(inst, bits3.gen7_atomic_op.aop_type, bits3.gen8_atomic_a64.aop_type)
#define SCRATCH_RW_OFFSET(inst) GEN_BITS_FIELD(inst, bits3.gen7_scratch_rw.offset)
#define SCRATCH_RW_BLOCK_SIZE(inst) GEN_BITS_FIELD(inst, bits3.gen7_scratch_rw.block_size)
#define SCRATCH_RW_INVALIDATE_AFTER_READ(inst) GEN_BITS_FIELD(inst, bits3.gen7_scratch_rw.invalidate_after_read)
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index f666a20..2838256 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -1029,6 +1029,33 @@ namespace gbe
p->UNTYPED_WRITEA64(addr, elemNum*2);
}
+ void Gen8Context::emitAtomicA64Instruction(const SelectionInstruction &insn)
+ {
+ const GenRegister src = ra->genReg(insn.src(0));
+ const GenRegister dst = ra->genReg(insn.dst(0));
+ const uint32_t function = insn.extra.function;
+ unsigned srcNum = insn.extra.elem;
+
+ const GenRegister bti = ra->genReg(insn.src(srcNum));
+
+ if (bti.file == GEN_IMMEDIATE_VALUE) {
+ p->ATOMICA64(dst, function, src, bti, srcNum);
+ } else {
+ GenRegister flagTemp = ra->genReg(insn.dst(1));
+ GenRegister btiTmp = ra->genReg(insn.dst(2));
+
+ unsigned desc = p->generateAtomicMessageDesc(function, 0, srcNum);
+
+ unsigned jip0 = beforeMessage(insn, bti, flagTemp, btiTmp, desc);
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->curr.useFlag(insn.state.flag, insn.state.subFlag);
+ p->ATOMICA64(dst, function, src, GenRegister::addr1(0), srcNum);
+ p->pop();
+ afterMessage(insn, bti, flagTemp, btiTmp, jip0);
+ }
+ }
+
void Gen8Context::emitPackLongInstruction(const SelectionInstruction &insn) {
const GenRegister src = ra->genReg(insn.src(0));
const GenRegister dst = ra->genReg(insn.dst(0));
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index dbee885..dbe5280 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -74,6 +74,7 @@ namespace gbe
virtual void emitRead64Instruction(const SelectionInstruction &insn);
virtual void emitWrite64A64Instruction(const SelectionInstruction &insn);
virtual void emitRead64A64Instruction(const SelectionInstruction &insn);
+ virtual void emitAtomicA64Instruction(const SelectionInstruction &insn);
virtual void emitI64MULInstruction(const SelectionInstruction &insn);
virtual void emitI64DIVREMInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp
index ee5e6ee..15afa49 100644
--- a/backend/src/backend/gen8_encoder.cpp
+++ b/backend/src/backend/gen8_encoder.cpp
@@ -123,7 +123,7 @@ namespace gbe
MOV(GenRegister::retype(dest, GEN_TYPE_HF), GenRegister::retype(src0, GEN_TYPE_F));
}
unsigned Gen8Encoder::setAtomicMessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum) {
- Gen8NativeInstruction *gen8_insn = &insn->gen8_insn;
+ Gen7NativeInstruction *gen8_insn = &insn->gen7_insn;
uint32_t msg_length = 0;
uint32_t response_length = 0;
@@ -168,6 +168,48 @@ namespace gbe
this->setSrc1(insn, bti);
}
}
+
+ unsigned Gen8Encoder::setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum) {
+ Gen8NativeInstruction *gen8_insn = &insn->gen8_insn;
+ uint32_t msg_length = 0;
+ uint32_t response_length = 0;
+
+ if (this->curr.execWidth == 8) {
+ msg_length = srcNum + 1;
+ response_length = 1;
+ } else if (this->curr.execWidth == 16) {
+ msg_length = 2 * (srcNum + 1);
+ response_length = 2;
+ } else
+ NOT_IMPLEMENTED;
+
+ const GenMessageTarget sfid = GEN_SFID_DATAPORT1_DATA;
+ setMessageDescriptor(insn, sfid, msg_length, response_length);
+ gen8_insn->bits3.gen8_atomic_a64.msg_type = GEN8_P1_UNTYPED_ATOMIC_A64;
+ gen8_insn->bits3.gen8_atomic_a64.bti = bti;
+ gen8_insn->bits3.gen8_atomic_a64.return_data = 1;
+ gen8_insn->bits3.gen8_atomic_a64.aop_type = function;
+ gen8_insn->bits3.gen8_atomic_a64.data_size = 0;
+
+ return gen8_insn->bits3.ud;
+ }
+
+ void Gen8Encoder::ATOMICA64(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum) {
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
+
+ this->setHeader(insn);
+ insn->header.destreg_or_condmod = GEN_SFID_DATAPORT_DATA;
+
+ this->setDst(insn, GenRegister::uw16grf(dst.nr, 0));
+ this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0));
+ if (bti.file == GEN_IMMEDIATE_VALUE) {
+ this->setSrc1(insn, GenRegister::immud(0));
+ setAtomicA64MessageDesc(insn, function, bti.value.ud, srcNum);
+ } else {
+ this->setSrc1(insn, bti);
+ }
+ }
+
unsigned Gen8Encoder::setUntypedReadMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum) {
uint32_t msg_length = 0;
uint32_t response_length = 0;
diff --git a/backend/src/backend/gen8_encoder.hpp b/backend/src/backend/gen8_encoder.hpp
index 8b74278..3e23df6 100644
--- a/backend/src/backend/gen8_encoder.hpp
+++ b/backend/src/backend/gen8_encoder.hpp
@@ -46,6 +46,7 @@ namespace gbe
virtual void LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double value);
virtual void LOAD_INT64_IMM(GenRegister dest, GenRegister value);
virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum);
+ virtual void ATOMICA64(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum);
virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemNum);
virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t elemNum);
virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t elemNum);
@@ -68,6 +69,7 @@ namespace gbe
GenRegister src1 = GenRegister::null());
virtual void handleDouble(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1 = GenRegister::null());
virtual unsigned setAtomicMessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum);
+ virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum);
virtual unsigned setUntypedReadMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
virtual unsigned setUntypedWriteMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
void setSrc0WithAcc(GenNativeInstruction *insn, GenRegister reg, uint32_t accN);
diff --git a/backend/src/backend/gen8_instruction.hpp b/backend/src/backend/gen8_instruction.hpp
index 1b5dafc..452517c 100644
--- a/backend/src/backend/gen8_instruction.hpp
+++ b/backend/src/backend/gen8_instruction.hpp
@@ -555,16 +555,15 @@ union Gen8NativeInstruction
struct {
uint32_t bti:8;
uint32_t aop_type:4;
- uint32_t simd_mode:1;
+ uint32_t data_size:1;
uint32_t return_data:1;
- uint32_t msg_type:4;
- uint32_t category:1;
+ uint32_t msg_type:5;
uint32_t header_present:1;
uint32_t response_length:5;
uint32_t msg_length:4;
uint32_t pad3:2;
uint32_t end_of_thread:1;
- } gen7_atomic_op;
+ } gen8_atomic_a64;
// gen8 untyped read/write
struct {
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index cef4e4c..05359af 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2250,6 +2250,9 @@ namespace gbe
void GenContext::emitWrite64A64Instruction(const SelectionInstruction &insn) {
assert(0);
}
+ void GenContext::emitAtomicA64Instruction(const SelectionInstruction &insn) {
+ assert(0);
+ }
void GenContext::emitUnpackByteInstruction(const SelectionInstruction &insn) {
const GenRegister src = ra->genReg(insn.src(0));
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 30e1ab0..f050548 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -158,6 +158,7 @@ namespace gbe
virtual void emitWrite64Instruction(const SelectionInstruction &insn);
virtual void emitRead64A64Instruction(const SelectionInstruction &insn);
virtual void emitWrite64A64Instruction(const SelectionInstruction &insn);
+ virtual void emitAtomicA64Instruction(const SelectionInstruction &insn);
void emitUntypedReadInstruction(const SelectionInstruction &insn);
void emitUntypedWriteInstruction(const SelectionInstruction &insn);
virtual void emitUntypedReadA64Instruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index fb43718..586c9a1 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -359,6 +359,7 @@ enum GenMessageTarget {
#define GEN8_P1_BYTE_GATHER_A64 16 //10000
#define GEN8_P1_UNTYPED_READ_A64 17 //10001
+#define GEN8_P1_UNTYPED_ATOMIC_A64 18 //10010
#define GEN8_P1_UNTYPED_WRITE_A64 25 //11001
#define GEN8_P1_BYTE_SCATTER_A64 26 //11010
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 7161d49..3f2fdbf 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -405,6 +405,10 @@ namespace gbe
assert(0);
}
+ void GenEncoder::ATOMICA64(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum) {
+ assert(0);
+ }
+
void GenEncoder::UNTYPED_WRITE(GenRegister msg, GenRegister bti, uint32_t elemNum) {
GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
assert(elemNum >= 1 || elemNum <= 4);
@@ -590,6 +594,10 @@ namespace gbe
NOT_SUPPORTED;
return insn->bits3.ud;
}
+ unsigned GenEncoder::setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum) {
+ GBE_ASSERT(0);
+ return 0;
+ }
void GenEncoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum) {
GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index f8d81c9..fb478d2 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -168,6 +168,8 @@ namespace gbe
void WAIT(void);
/*! Atomic instructions */
virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum);
+ /*! AtomicA64 instructions */
+ virtual void ATOMICA64(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum);
/*! Untyped read (upto 4 channels) */
virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemNum);
/*! Untyped write (upto 4 channels) */
@@ -237,6 +239,7 @@ namespace gbe
unsigned msg_length, unsigned response_length,
bool header_present = false, bool end_of_thread = false);
virtual unsigned setAtomicMessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum);
+ virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum);
virtual unsigned setUntypedReadMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
virtual unsigned setUntypedWriteMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
unsigned setByteGatherMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemSize);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index 15eac79..792014f 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -46,6 +46,7 @@ DECL_GEN7_SCHEDULE(TypedWrite, 80, 1, 1)
DECL_GEN7_SCHEDULE(SpillReg, 20, 1, 1)
DECL_GEN7_SCHEDULE(UnSpillReg, 160, 1, 1)
DECL_GEN7_SCHEDULE(Atomic, 80, 1, 1)
+DECL_GEN7_SCHEDULE(AtomicA64, 80, 1, 1)
DECL_GEN7_SCHEDULE(I64MUL, 20, 40, 20)
DECL_GEN7_SCHEDULE(I64SATADD, 20, 40, 20)
DECL_GEN7_SCHEDULE(I64SATSUB, 20, 40, 20)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index d19f985..7be305f 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -187,6 +187,7 @@ namespace gbe
this->opcode == SEL_OP_READ64 ||
this->opcode == SEL_OP_READ64A64 ||
this->opcode == SEL_OP_ATOMIC ||
+ this->opcode == SEL_OP_ATOMICA64 ||
this->opcode == SEL_OP_BYTE_GATHER ||
this->opcode == SEL_OP_BYTE_GATHERA64 ||
this->opcode == SEL_OP_SAMPLE ||
@@ -213,6 +214,7 @@ namespace gbe
this->opcode == SEL_OP_WRITE64 ||
this->opcode == SEL_OP_WRITE64A64 ||
this->opcode == SEL_OP_ATOMIC ||
+ this->opcode == SEL_OP_ATOMICA64 ||
this->opcode == SEL_OP_BYTE_SCATTER ||
this->opcode == SEL_OP_BYTE_SCATTERA64 ||
this->opcode == SEL_OP_TYPED_WRITE;
@@ -629,6 +631,8 @@ namespace gbe
void WAIT(void);
/*! Atomic instruction */
void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg src1, Reg src2, GenRegister bti, vector<GenRegister> temps);
+ /*! AtomicA64 instruction */
+ void ATOMICA64(Reg dst, uint32_t function, uint32_t srcNum, vector<GenRegister> src, GenRegister bti, vector<GenRegister> temps);
/*! Read 64 bits float/int array */
void READ64(Reg addr, const GenRegister *dst, const GenRegister *tmp, uint32_t elemNum, const GenRegister bti, bool native_long, vector<GenRegister> temps);
/*! Write 64 bits float/int array */
@@ -1304,6 +1308,38 @@ namespace gbe
vector->isSrc = 1;
}
+ void Selection::Opaque::ATOMICA64(Reg dst, uint32_t function,
+ uint32_t msgPayload, vector<GenRegister> src,
+ GenRegister bti,
+ vector<GenRegister> temps) {
+ unsigned dstNum = 1 + temps.size();
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_ATOMICA64, dstNum, msgPayload + 1);
+
+ if (bti.file != GEN_IMMEDIATE_VALUE) {
+ insn->state.flag = 0;
+ insn->state.subFlag = 1;
+ }
+
+ insn->dst(0) = dst;
+ if(temps.size()) {
+ insn->dst(1) = temps[0];
+ insn->dst(2) = temps[1];
+ }
+
+ for (uint32_t elemID = 0; elemID < msgPayload; ++elemID)
+ insn->src(elemID) = src[elemID];
+ insn->src(msgPayload) = bti;
+
+ insn->extra.function = function;
+ insn->extra.elem = msgPayload;
+
+ SelectionVector *vector = this->appendVector();
+ vector->regNum = msgPayload; //bti not included in SelectionVector
+ vector->offsetID = 0;
+ vector->reg = &insn->src(0);
+ vector->isSrc = 1;
+ }
+
void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); }
void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); }
void Selection::Opaque::WAIT(void) { this->appendInsn(SEL_OP_WAIT, 0, 0); }
@@ -5481,6 +5517,76 @@ namespace gbe
this->opcodes.push_back(ir::Opcode(op));
}
+ /* Used to transform address from 64bit to 32bit, note as dataport messages
+ * cannot accept scalar register, so here to convert to non-uniform
+ * register here. */
+ GenRegister convertU64ToU32(Selection::Opaque &sel,
+ GenRegister addr) const {
+ GenRegister unpacked = GenRegister::retype(sel.unpacked_ud(addr.reg()), GEN_TYPE_UD);
+ GenRegister dst = sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32);
+ sel.MOV(dst, unpacked);
+ return dst;
+ }
+
+ void untypedAtomicA64Stateless(Selection::Opaque &sel,
+ const ir::AtomicInstruction &insn,
+ unsigned msgPayload,
+ GenRegister dst,
+ GenRegister addr,
+ GenRegister src1,
+ GenRegister src2,
+ GenRegister bti) const {
+ using namespace ir;
+ GenRegister addrQ;
+ const AtomicOps atomicOp = insn.getAtomicOpcode();
+ GenAtomicOpCode genAtomicOp = (GenAtomicOpCode)atomicOp;
+ unsigned addrBytes = typeSize(addr.type);
+ GBE_ASSERT(msgPayload <= 3);
+
+ unsigned simdWidth = sel.curr.execWidth;
+ AddressMode AM = insn.getAddressMode();
+ if (addrBytes == 4) {
+ addrQ = sel.selReg(sel.reg(ir::FAMILY_QWORD), ir::TYPE_U64);
+ sel.MOV(addrQ, addr);
+ } else {
+ addrQ = addr;
+ }
+
+ if (simdWidth == 8) {
+ vector<GenRegister> msgs;
+ msgs.push_back(addr);
+ msgs.push_back(src1);
+ msgs.push_back(src2);
+ sel.ATOMICA64(dst, genAtomicOp, msgPayload, msgs, bti, sel.getBTITemps(AM));
+ } else if (simdWidth == 16) {
+ vector<GenRegister> msgs;
+ for (unsigned k = 0; k < msgPayload; k++) {
+ msgs.push_back(sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32));
+ }
+ sel.push();
+ /* first quarter */
+ sel.curr.execWidth = 8;
+ sel.curr.quarterControl = GEN_COMPRESSION_Q1;
+ sel.MOV(GenRegister::retype(msgs[0], GEN_TYPE_UL), GenRegister::Qn(addrQ, 0));
+ if(msgPayload > 1)
+ sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 0));
+ if(msgPayload > 2)
+ sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 0));
+ sel.ATOMICA64(GenRegister::Qn(dst, 0), genAtomicOp, msgPayload, msgs, bti, sel.getBTITemps(AM));
+
+ /* second quarter */
+ sel.curr.execWidth = 8;
+ sel.curr.quarterControl = GEN_COMPRESSION_Q2;
+ sel.MOV(GenRegister::retype(msgs[0], GEN_TYPE_UL), GenRegister::Qn(addrQ, 1));
+ if(msgPayload > 1)
+ sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 1));
+ if(msgPayload > 2)
+ sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 1));
+ sel.ATOMICA64(GenRegister::Qn(dst, 1), genAtomicOp, msgPayload, msgs, bti, sel.getBTITemps(AM));
+ sel.pop();
+ }
+ }
+
INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
using namespace ir;
const ir::AtomicInstruction &insn = cast<ir::AtomicInstruction>(dag.insn);
@@ -5489,6 +5595,15 @@ namespace gbe
const AtomicOps atomicOp = insn.getAtomicOpcode();
unsigned srcNum = insn.getSrcNum();
unsigned msgPayload;
+ Register reg = insn.getAddressRegister();
+ GenRegister address = sel.selReg(reg, getType(sel.getRegisterFamily(reg)));
+ AddressSpace addrSpace = insn.getAddressSpace();
+ GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL ||
+ insn.getAddressSpace() == MEM_PRIVATE ||
+ insn.getAddressSpace() == MEM_LOCAL ||
+ insn.getAddressSpace() == MEM_GENERIC ||
+ insn.getAddressSpace() == MEM_MIXED);
+ unsigned addrBytes = typeSize(address.type);
AddressMode AM = insn.getAddressMode();
if (AM == AM_DynamicBti) {
@@ -5508,7 +5623,16 @@ namespace gbe
if(msgPayload > 2) src2 = sel.selReg(insn.getSrc(2), TYPE_U32);
GenAtomicOpCode genAtomicOp = (GenAtomicOpCode)atomicOp;
- sel.ATOMIC(dst, genAtomicOp, msgPayload, src0, src1, src2, bti, sel.getBTITemps(AM));
+ if (AM == AM_DynamicBti) {
+ sel.ATOMIC(dst, genAtomicOp, msgPayload, address, src1, src2, bti, sel.getBTITemps(AM));
+ } else if (addrSpace == ir::MEM_LOCAL) {
+ // stateless mode, local still use bti access
+ GenRegister addrDW = address;
+ if (addrBytes == 8)
+ addrDW = convertU64ToU32(sel, address);
+ sel.ATOMIC(dst, genAtomicOp, msgPayload, addrDW, src1, src2, GenRegister::immud(0xfe), sel.getBTITemps(AM));
+ } else
+ untypedAtomicA64Stateless(sel, insn, msgPayload, dst, address, src1, src2, bti);
markAllChildren(dag);
return true;
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 1fbcb1a..f6ed284 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -40,6 +40,7 @@ DECL_SELECTION_IR(I64MUL, I64MULInstruction)
DECL_SELECTION_IR(I64DIV, I64DIVREMInstruction)
DECL_SELECTION_IR(I64REM, I64DIVREMInstruction)
DECL_SELECTION_IR(ATOMIC, AtomicInstruction)
+DECL_SELECTION_IR(ATOMICA64, AtomicA64Instruction)
DECL_SELECTION_IR(MACH, BinaryInstruction)
DECL_SELECTION_IR(CMP, CompareInstruction)
DECL_SELECTION_IR(I64CMP, I64CompareInstruction)
--
2.1.4
More information about the Beignet
mailing list