[Beignet] [patch v3 2/7] [OCL20] gbe: add AtomicA64 instructions with stateless access.
Song, Ruiling
ruiling.song at intel.com
Wed Jan 6 04:38:46 PST 2016
The patchset looks good.
Thanks!
Ruiling
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> xionghu.luo at intel.com
> Sent: Tuesday, January 5, 2016 11:28 PM
> To: beignet at lists.freedesktop.org
> Cc: Luo, Xionghu <xionghu.luo at intel.com>
> Subject: [Beignet] [patch v3 2/7] [OCL20] gbe: add AtomicA64 instructions with
> stateless access.
>
> From: Luo Xionghu <xionghu.luo at intel.com>
>
> add SEL_OP_ATOMICA64 for gen8 instruction selection and add
> ATOMICA64 for gen8 encoder accordingly, handle both simd8 and simd16
> usage. for local type atomic, still use bti 254.
>
> v2: remove useless code in stateless A64 atomic; add mising static
> address mode process; remove flag set since only dynamic address mode
> need it.
> v3: add gen8_atomic_a64 field in Gen8NativeInstruction to be compatible
> with gen7_atomic_op.
> Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
> ---
> backend/src/backend/gen/gen_mesa_disasm.c | 2 +-
> backend/src/backend/gen8_context.cpp | 11 ++
> backend/src/backend/gen8_context.hpp | 1 +
> backend/src/backend/gen8_encoder.cpp | 38 ++++++
> backend/src/backend/gen8_encoder.hpp | 2 +
> backend/src/backend/gen8_instruction.hpp | 14 +++
> backend/src/backend/gen_context.cpp | 3 +
> backend/src/backend/gen_context.hpp | 1 +
> backend/src/backend/gen_defs.hpp | 1 +
> backend/src/backend/gen_encoder.cpp | 8 ++
> backend/src/backend/gen_encoder.hpp | 3 +
> .../src/backend/gen_insn_gen7_schedule_info.hxx | 1 +
> backend/src/backend/gen_insn_selection.cpp | 133
> ++++++++++++++++++++-
> backend/src/backend/gen_insn_selection.hxx | 1 +
> 14 files changed, 212 insertions(+), 7 deletions(-)
>
> diff --git a/backend/src/backend/gen/gen_mesa_disasm.c
> b/backend/src/backend/gen/gen_mesa_disasm.c
> index 52dfcd6..82a7524 100644
> --- a/backend/src/backend/gen/gen_mesa_disasm.c
> +++ b/backend/src/backend/gen/gen_mesa_disasm.c
> @@ -557,7 +557,7 @@ static int gen_version;
> #define UNTYPED_RW_SIMD_MODE(inst) GEN_BITS_FIELD(inst,
> bits3.gen7_untyped_rw.simd_mode)
> #define UNTYPED_RW_CATEGORY(inst) GEN_BITS_FIELD(inst,
> bits3.gen7_untyped_rw.category)
> #define UNTYPED_RW_MSG_TYPE(inst) GEN_BITS_FIELD(inst,
> bits3.gen7_untyped_rw.msg_type)
> -#define UNTYPED_RW_AOP_TYPE(inst) GEN_BITS_FIELD(inst,
> bits3.gen7_atomic_op.aop_type)
> +#define UNTYPED_RW_AOP_TYPE(inst) GEN_BITS_FIELD2(inst,
> bits3.gen7_atomic_op.aop_type, bits3.gen8_atomic_a64.aop_type)
> #define SCRATCH_RW_OFFSET(inst) GEN_BITS_FIELD(inst,
> bits3.gen7_scratch_rw.offset)
> #define SCRATCH_RW_BLOCK_SIZE(inst) GEN_BITS_FIELD(inst,
> bits3.gen7_scratch_rw.block_size)
> #define SCRATCH_RW_INVALIDATE_AFTER_READ(inst) GEN_BITS_FIELD(inst,
> bits3.gen7_scratch_rw.invalidate_after_read)
> diff --git a/backend/src/backend/gen8_context.cpp
> b/backend/src/backend/gen8_context.cpp
> index f666a20..d19b2c0 100644
> --- a/backend/src/backend/gen8_context.cpp
> +++ b/backend/src/backend/gen8_context.cpp
> @@ -1029,6 +1029,17 @@ namespace gbe
>
> p->UNTYPED_WRITEA64(addr, elemNum*2);
> }
> + void Gen8Context::emitAtomicA64Instruction(const SelectionInstruction
> &insn)
> + {
> + const GenRegister src = ra->genReg(insn.src(0));
> + const GenRegister dst = ra->genReg(insn.dst(0));
> + const uint32_t function = insn.extra.function;
> + unsigned srcNum = insn.extra.elem;
> + const GenRegister bti = ra->genReg(insn.src(srcNum));
> + GBE_ASSERT(bti.value.ud == 0xff);
> + p->ATOMICA64(dst, function, src, bti, srcNum);
> + }
> +
> void Gen8Context::emitPackLongInstruction(const SelectionInstruction &insn)
> {
> const GenRegister src = ra->genReg(insn.src(0));
> const GenRegister dst = ra->genReg(insn.dst(0));
> diff --git a/backend/src/backend/gen8_context.hpp
> b/backend/src/backend/gen8_context.hpp
> index dbee885..dbe5280 100644
> --- a/backend/src/backend/gen8_context.hpp
> +++ b/backend/src/backend/gen8_context.hpp
> @@ -74,6 +74,7 @@ namespace gbe
> virtual void emitRead64Instruction(const SelectionInstruction &insn);
> virtual void emitWrite64A64Instruction(const SelectionInstruction &insn);
> virtual void emitRead64A64Instruction(const SelectionInstruction &insn);
> + virtual void emitAtomicA64Instruction(const SelectionInstruction &insn);
> virtual void emitI64MULInstruction(const SelectionInstruction &insn);
> virtual void emitI64DIVREMInstruction(const SelectionInstruction &insn);
>
> diff --git a/backend/src/backend/gen8_encoder.cpp
> b/backend/src/backend/gen8_encoder.cpp
> index ee5e6ee..d320290 100644
> --- a/backend/src/backend/gen8_encoder.cpp
> +++ b/backend/src/backend/gen8_encoder.cpp
> @@ -168,6 +168,44 @@ namespace gbe
> this->setSrc1(insn, bti);
> }
> }
> +
> + unsigned Gen8Encoder::setAtomicA64MessageDesc(GenNativeInstruction
> *insn, unsigned function, unsigned bti, unsigned srcNum) {
> + Gen8NativeInstruction *gen8_insn = &insn->gen8_insn;
> + uint32_t msg_length = 0;
> + uint32_t response_length = 0;
> +
> + if (this->curr.execWidth == 8) {
> + msg_length = srcNum + 1;
> + response_length = 1;
> + } else if (this->curr.execWidth == 16) {
> + msg_length = 2 * (srcNum + 1);
> + response_length = 2;
> + } else
> + NOT_IMPLEMENTED;
> +
> + const GenMessageTarget sfid = GEN_SFID_DATAPORT1_DATA;
> + setMessageDescriptor(insn, sfid, msg_length, response_length);
> + gen8_insn->bits3.gen8_atomic_a64.msg_type =
> GEN8_P1_UNTYPED_ATOMIC_A64;
> + gen8_insn->bits3.gen8_atomic_a64.bti = bti;
> + gen8_insn->bits3.gen8_atomic_a64.return_data = 1;
> + gen8_insn->bits3.gen8_atomic_a64.aop_type = function;
> + gen8_insn->bits3.gen8_atomic_a64.data_size = 0;
> +
> + return gen8_insn->bits3.ud;
> + }
> +
> + void Gen8Encoder::ATOMICA64(GenRegister dst, uint32_t function,
> GenRegister src, GenRegister bti, uint32_t srcNum) {
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> +
> + this->setHeader(insn);
> + insn->header.destreg_or_condmod = GEN_SFID_DATAPORT_DATA;
> +
> + this->setDst(insn, GenRegister::uw16grf(dst.nr, 0));
> + this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0));
> + this->setSrc1(insn, GenRegister::immud(0));
> + setAtomicA64MessageDesc(insn, function, bti.value.ud, srcNum);
> + }
> +
> unsigned Gen8Encoder::setUntypedReadMessageDesc(GenNativeInstruction
> *insn, unsigned bti, unsigned elemNum) {
> uint32_t msg_length = 0;
> uint32_t response_length = 0;
> diff --git a/backend/src/backend/gen8_encoder.hpp
> b/backend/src/backend/gen8_encoder.hpp
> index 8b74278..3e23df6 100644
> --- a/backend/src/backend/gen8_encoder.hpp
> +++ b/backend/src/backend/gen8_encoder.hpp
> @@ -46,6 +46,7 @@ namespace gbe
> virtual void LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double value);
> virtual void LOAD_INT64_IMM(GenRegister dest, GenRegister value);
> virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src,
> GenRegister bti, uint32_t srcNum);
> + virtual void ATOMICA64(GenRegister dst, uint32_t function, GenRegister src,
> GenRegister bti, uint32_t srcNum);
> virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister
> bti, uint32_t elemNum);
> virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t
> elemNum);
> virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t
> elemNum);
> @@ -68,6 +69,7 @@ namespace gbe
> GenRegister src1 = GenRegister::null());
> virtual void handleDouble(GenEncoder *p, uint32_t opcode, GenRegister dst,
> GenRegister src0, GenRegister src1 = GenRegister::null());
> virtual unsigned setAtomicMessageDesc(GenNativeInstruction *insn,
> unsigned function, unsigned bti, unsigned srcNum);
> + virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn,
> unsigned function, unsigned bti, unsigned srcNum);
> virtual unsigned setUntypedReadMessageDesc(GenNativeInstruction *insn,
> unsigned bti, unsigned elemNum);
> virtual unsigned setUntypedWriteMessageDesc(GenNativeInstruction *insn,
> unsigned bti, unsigned elemNum);
> void setSrc0WithAcc(GenNativeInstruction *insn, GenRegister reg, uint32_t
> accN);
> diff --git a/backend/src/backend/gen8_instruction.hpp
> b/backend/src/backend/gen8_instruction.hpp
> index 1b5dafc..b45376d 100644
> --- a/backend/src/backend/gen8_instruction.hpp
> +++ b/backend/src/backend/gen8_instruction.hpp
> @@ -566,6 +566,20 @@ union Gen8NativeInstruction
> uint32_t end_of_thread:1;
> } gen7_atomic_op;
>
> + /*! atomic a64 messages */
> + struct {
> + uint32_t bti:8;
> + uint32_t aop_type:4;
> + uint32_t data_size:1;
> + uint32_t return_data:1;
> + uint32_t msg_type:5;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad3:2;
> + uint32_t end_of_thread:1;
> + } gen8_atomic_a64;
> +
> // gen8 untyped read/write
> struct {
> uint32_t bti:8;
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index cef4e4c..05359af 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -2250,6 +2250,9 @@ namespace gbe
> void GenContext::emitWrite64A64Instruction(const SelectionInstruction &insn)
> {
> assert(0);
> }
> + void GenContext::emitAtomicA64Instruction(const SelectionInstruction &insn)
> {
> + assert(0);
> + }
>
> void GenContext::emitUnpackByteInstruction(const SelectionInstruction &insn)
> {
> const GenRegister src = ra->genReg(insn.src(0));
> diff --git a/backend/src/backend/gen_context.hpp
> b/backend/src/backend/gen_context.hpp
> index 30e1ab0..f050548 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -158,6 +158,7 @@ namespace gbe
> virtual void emitWrite64Instruction(const SelectionInstruction &insn);
> virtual void emitRead64A64Instruction(const SelectionInstruction &insn);
> virtual void emitWrite64A64Instruction(const SelectionInstruction &insn);
> + virtual void emitAtomicA64Instruction(const SelectionInstruction &insn);
> void emitUntypedReadInstruction(const SelectionInstruction &insn);
> void emitUntypedWriteInstruction(const SelectionInstruction &insn);
> virtual void emitUntypedReadA64Instruction(const SelectionInstruction
> &insn);
> diff --git a/backend/src/backend/gen_defs.hpp
> b/backend/src/backend/gen_defs.hpp
> index fb43718..586c9a1 100644
> --- a/backend/src/backend/gen_defs.hpp
> +++ b/backend/src/backend/gen_defs.hpp
> @@ -359,6 +359,7 @@ enum GenMessageTarget {
>
> #define GEN8_P1_BYTE_GATHER_A64 16 //10000
> #define GEN8_P1_UNTYPED_READ_A64 17 //10001
> +#define GEN8_P1_UNTYPED_ATOMIC_A64 18 //10010
> #define GEN8_P1_UNTYPED_WRITE_A64 25 //11001
> #define GEN8_P1_BYTE_SCATTER_A64 26 //11010
>
> diff --git a/backend/src/backend/gen_encoder.cpp
> b/backend/src/backend/gen_encoder.cpp
> index 7161d49..3f2fdbf 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -405,6 +405,10 @@ namespace gbe
> assert(0);
> }
>
> + void GenEncoder::ATOMICA64(GenRegister dst, uint32_t function,
> GenRegister src, GenRegister bti, uint32_t srcNum) {
> + assert(0);
> + }
> +
> void GenEncoder::UNTYPED_WRITE(GenRegister msg, GenRegister bti,
> uint32_t elemNum) {
> GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> assert(elemNum >= 1 || elemNum <= 4);
> @@ -590,6 +594,10 @@ namespace gbe
> NOT_SUPPORTED;
> return insn->bits3.ud;
> }
> + unsigned GenEncoder::setAtomicA64MessageDesc(GenNativeInstruction
> *insn, unsigned function, unsigned bti, unsigned srcNum) {
> + GBE_ASSERT(0);
> + return 0;
> + }
>
> void GenEncoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister src,
> GenRegister bti, uint32_t srcNum) {
> GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> diff --git a/backend/src/backend/gen_encoder.hpp
> b/backend/src/backend/gen_encoder.hpp
> index f8d81c9..fb478d2 100644
> --- a/backend/src/backend/gen_encoder.hpp
> +++ b/backend/src/backend/gen_encoder.hpp
> @@ -168,6 +168,8 @@ namespace gbe
> void WAIT(void);
> /*! Atomic instructions */
> virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src,
> GenRegister bti, uint32_t srcNum);
> + /*! AtomicA64 instructions */
> + virtual void ATOMICA64(GenRegister dst, uint32_t function, GenRegister src,
> GenRegister bti, uint32_t srcNum);
> /*! Untyped read (upto 4 channels) */
> virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister
> bti, uint32_t elemNum);
> /*! Untyped write (upto 4 channels) */
> @@ -237,6 +239,7 @@ namespace gbe
> unsigned msg_length, unsigned response_length,
> bool header_present = false, bool end_of_thread = false);
> virtual unsigned setAtomicMessageDesc(GenNativeInstruction *insn,
> unsigned function, unsigned bti, unsigned srcNum);
> + virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn,
> unsigned function, unsigned bti, unsigned srcNum);
> virtual unsigned setUntypedReadMessageDesc(GenNativeInstruction *insn,
> unsigned bti, unsigned elemNum);
> virtual unsigned setUntypedWriteMessageDesc(GenNativeInstruction *insn,
> unsigned bti, unsigned elemNum);
> unsigned setByteGatherMessageDesc(GenNativeInstruction *insn, unsigned
> bti, unsigned elemSize);
> diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> index 15eac79..792014f 100644
> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> @@ -46,6 +46,7 @@ DECL_GEN7_SCHEDULE(TypedWrite, 80, 1, 1)
> DECL_GEN7_SCHEDULE(SpillReg, 20, 1, 1)
> DECL_GEN7_SCHEDULE(UnSpillReg, 160, 1, 1)
> DECL_GEN7_SCHEDULE(Atomic, 80, 1, 1)
> +DECL_GEN7_SCHEDULE(AtomicA64, 80, 1, 1)
> DECL_GEN7_SCHEDULE(I64MUL, 20, 40, 20)
> DECL_GEN7_SCHEDULE(I64SATADD, 20, 40, 20)
> DECL_GEN7_SCHEDULE(I64SATSUB, 20, 40, 20)
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index d19f985..09de170 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -187,6 +187,7 @@ namespace gbe
> this->opcode == SEL_OP_READ64 ||
> this->opcode == SEL_OP_READ64A64 ||
> this->opcode == SEL_OP_ATOMIC ||
> + this->opcode == SEL_OP_ATOMICA64 ||
> this->opcode == SEL_OP_BYTE_GATHER ||
> this->opcode == SEL_OP_BYTE_GATHERA64 ||
> this->opcode == SEL_OP_SAMPLE ||
> @@ -213,6 +214,7 @@ namespace gbe
> this->opcode == SEL_OP_WRITE64 ||
> this->opcode == SEL_OP_WRITE64A64 ||
> this->opcode == SEL_OP_ATOMIC ||
> + this->opcode == SEL_OP_ATOMICA64 ||
> this->opcode == SEL_OP_BYTE_SCATTER ||
> this->opcode == SEL_OP_BYTE_SCATTERA64 ||
> this->opcode == SEL_OP_TYPED_WRITE;
> @@ -629,6 +631,8 @@ namespace gbe
> void WAIT(void);
> /*! Atomic instruction */
> void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg src1,
> Reg src2, GenRegister bti, vector<GenRegister> temps);
> + /*! AtomicA64 instruction */
> + void ATOMICA64(Reg dst, uint32_t function, uint32_t srcNum,
> vector<GenRegister> src, GenRegister bti, vector<GenRegister> temps);
> /*! Read 64 bits float/int array */
> void READ64(Reg addr, const GenRegister *dst, const GenRegister *tmp,
> uint32_t elemNum, const GenRegister bti, bool native_long, vector<GenRegister>
> temps);
> /*! Write 64 bits float/int array */
> @@ -1304,6 +1308,33 @@ namespace gbe
> vector->isSrc = 1;
> }
>
> + void Selection::Opaque::ATOMICA64(Reg dst, uint32_t function,
> + uint32_t msgPayload, vector<GenRegister> src,
> + GenRegister bti,
> + vector<GenRegister> temps) {
> + unsigned dstNum = 1 + temps.size();
> + SelectionInstruction *insn = this->appendInsn(SEL_OP_ATOMICA64, dstNum,
> msgPayload + 1);
> +
> + insn->dst(0) = dst;
> + if(temps.size()) {
> + insn->dst(1) = temps[0];
> + insn->dst(2) = temps[1];
> + }
> +
> + for (uint32_t elemID = 0; elemID < msgPayload; ++elemID)
> + insn->src(elemID) = src[elemID];
> + insn->src(msgPayload) = bti;
> +
> + insn->extra.function = function;
> + insn->extra.elem = msgPayload;
> +
> + SelectionVector *vector = this->appendVector();
> + vector->regNum = msgPayload; //bti not included in SelectionVector
> + vector->offsetID = 0;
> + vector->reg = &insn->src(0);
> + vector->isSrc = 1;
> + }
> +
> void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); }
> void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); }
> void Selection::Opaque::WAIT(void) { this->appendInsn(SEL_OP_WAIT, 0, 0); }
> @@ -5481,34 +5512,124 @@ namespace gbe
> this->opcodes.push_back(ir::Opcode(op));
> }
>
> + /* Used to transform address from 64bit to 32bit, note as dataport messages
> + * cannot accept scalar register, so here to convert to non-uniform
> + * register here. */
> + GenRegister convertU64ToU32(Selection::Opaque &sel,
> + GenRegister addr) const {
> + GenRegister unpacked = GenRegister::retype(sel.unpacked_ud(addr.reg()),
> GEN_TYPE_UD);
> + GenRegister dst = sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32);
> + sel.MOV(dst, unpacked);
> + return dst;
> + }
> +
> + void untypedAtomicA64Stateless(Selection::Opaque &sel,
> + const ir::AtomicInstruction &insn,
> + unsigned msgPayload,
> + GenRegister dst,
> + GenRegister addr,
> + GenRegister src1,
> + GenRegister src2,
> + GenRegister bti) const {
> + using namespace ir;
> + GenRegister addrQ;
> + const AtomicOps atomicOp = insn.getAtomicOpcode();
> + GenAtomicOpCode genAtomicOp = (GenAtomicOpCode)atomicOp;
> + unsigned addrBytes = typeSize(addr.type);
> + GBE_ASSERT(msgPayload <= 3);
> +
> + unsigned simdWidth = sel.curr.execWidth;
> + AddressMode AM = insn.getAddressMode();
> + if (addrBytes == 4) {
> + addrQ = sel.selReg(sel.reg(ir::FAMILY_QWORD), ir::TYPE_U64);
> + sel.MOV(addrQ, addr);
> + } else {
> + addrQ = addr;
> + }
> +
> + if (simdWidth == 8) {
> + vector<GenRegister> msgs;
> + msgs.push_back(addr);
> + msgs.push_back(src1);
> + msgs.push_back(src2);
> + sel.ATOMICA64(dst, genAtomicOp, msgPayload, msgs, bti,
> sel.getBTITemps(AM));
> + } else if (simdWidth == 16) {
> + vector<GenRegister> msgs;
> + for (unsigned k = 0; k < msgPayload; k++) {
> + msgs.push_back(sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32));
> + }
> + sel.push();
> + /* first quarter */
> + sel.curr.execWidth = 8;
> + sel.curr.quarterControl = GEN_COMPRESSION_Q1;
> + sel.MOV(GenRegister::retype(msgs[0], GEN_TYPE_UL),
> GenRegister::Qn(addrQ, 0));
> + if(msgPayload > 1)
> + sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 0));
> + if(msgPayload > 2)
> + sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 0));
> + sel.ATOMICA64(GenRegister::Qn(dst, 0), genAtomicOp, msgPayload, msgs,
> bti, sel.getBTITemps(AM));
> +
> + /* second quarter */
> + sel.curr.execWidth = 8;
> + sel.curr.quarterControl = GEN_COMPRESSION_Q2;
> + sel.MOV(GenRegister::retype(msgs[0], GEN_TYPE_UL),
> GenRegister::Qn(addrQ, 1));
> + if(msgPayload > 1)
> + sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 1));
> + if(msgPayload > 2)
> + sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 1));
> + sel.ATOMICA64(GenRegister::Qn(dst, 1), genAtomicOp, msgPayload, msgs,
> bti, sel.getBTITemps(AM));
> + sel.pop();
> + }
> + }
> +
> INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
> using namespace ir;
> const ir::AtomicInstruction &insn = cast<ir::AtomicInstruction>(dag.insn);
>
> - ir::BTI b;
> const AtomicOps atomicOp = insn.getAtomicOpcode();
> unsigned srcNum = insn.getSrcNum();
> unsigned msgPayload;
> + Register reg = insn.getAddressRegister();
> + GenRegister address = sel.selReg(reg, getType(sel.getRegisterFamily(reg)));
> + AddressSpace addrSpace = insn.getAddressSpace();
> + GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL ||
> + insn.getAddressSpace() == MEM_PRIVATE ||
> + insn.getAddressSpace() == MEM_LOCAL ||
> + insn.getAddressSpace() == MEM_GENERIC ||
> + insn.getAddressSpace() == MEM_MIXED);
> + unsigned addrBytes = typeSize(address.type);
>
> AddressMode AM = insn.getAddressMode();
> if (AM == AM_DynamicBti) {
> - b.reg = insn.getBtiReg();
> msgPayload = srcNum - 1;
> } else {
> - b.imm = insn.getSurfaceIndex();
> - b.isConst = 1;
> msgPayload = srcNum;
> }
>
> GenRegister dst = sel.selReg(insn.getDst(0), TYPE_U32);
> - GenRegister bti = b.isConst ? GenRegister::immud(b.imm) : sel.selReg(b.reg,
> ir::TYPE_U32);
> GenRegister src0 = sel.selReg(insn.getAddressRegister(), TYPE_U32);
> GenRegister src1 = src0, src2 = src0;
> if(msgPayload > 1) src1 = sel.selReg(insn.getSrc(1), TYPE_U32);
> if(msgPayload > 2) src2 = sel.selReg(insn.getSrc(2), TYPE_U32);
>
> GenAtomicOpCode genAtomicOp = (GenAtomicOpCode)atomicOp;
> - sel.ATOMIC(dst, genAtomicOp, msgPayload, src0, src1, src2, bti,
> sel.getBTITemps(AM));
> + if (AM == AM_DynamicBti || AM == AM_StaticBti) {
> + if (AM == AM_DynamicBti) {
> + Register btiReg = insn.getBtiReg();
> + sel.ATOMIC(dst, genAtomicOp, msgPayload, address, src1, src2,
> sel.selReg(btiReg, TYPE_U32), sel.getBTITemps(AM));
> + } else {
> + unsigned SI = insn.getSurfaceIndex();
> + sel.ATOMIC(dst, genAtomicOp, msgPayload, address, src1, src2,
> GenRegister::immud(SI), sel.getBTITemps(AM));
> + }
> + } else if (addrSpace == ir::MEM_LOCAL) {
> + // stateless mode, local still use bti access
> + GenRegister addrDW = address;
> + if (addrBytes == 8)
> + addrDW = convertU64ToU32(sel, address);
> + sel.ATOMIC(dst, genAtomicOp, msgPayload, addrDW, src1, src2,
> GenRegister::immud(0xfe), sel.getBTITemps(AM));
> + }
> + else
> + untypedAtomicA64Stateless(sel, insn, msgPayload, dst, address, src1, src2,
> GenRegister::immud(0xff));
>
> markAllChildren(dag);
> return true;
> diff --git a/backend/src/backend/gen_insn_selection.hxx
> b/backend/src/backend/gen_insn_selection.hxx
> index 1fbcb1a..f6ed284 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -40,6 +40,7 @@ DECL_SELECTION_IR(I64MUL, I64MULInstruction)
> DECL_SELECTION_IR(I64DIV, I64DIVREMInstruction)
> DECL_SELECTION_IR(I64REM, I64DIVREMInstruction)
> DECL_SELECTION_IR(ATOMIC, AtomicInstruction)
> +DECL_SELECTION_IR(ATOMICA64, AtomicA64Instruction)
> DECL_SELECTION_IR(MACH, BinaryInstruction)
> DECL_SELECTION_IR(CMP, CompareInstruction)
> DECL_SELECTION_IR(I64CMP, I64CompareInstruction)
> --
> 2.1.4
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list