[Beignet] [patch v3 2/7] [OCL20] gbe: add AtomicA64 instructions with stateless access.

Song, Ruiling ruiling.song at intel.com
Wed Jan 6 04:38:46 PST 2016


The patchset looks good.

Thanks!
Ruiling

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> xionghu.luo at intel.com
> Sent: Tuesday, January 5, 2016 11:28 PM
> To: beignet at lists.freedesktop.org
> Cc: Luo, Xionghu <xionghu.luo at intel.com>
> Subject: [Beignet] [patch v3 2/7] [OCL20] gbe: add AtomicA64 instructions with
> stateless access.
> 
> From: Luo Xionghu <xionghu.luo at intel.com>
> 
> add SEL_OP_ATOMICA64 for gen8 instruction selection and add
> ATOMICA64 for gen8 encoder accordingly, handle both simd8 and simd16
> usage. for local type atomic, still use bti 254.
> 
> v2: remove useless code in stateless A64 atomic; add mising static
> address mode process; remove flag set since only dynamic address mode
> need it.
> v3: add gen8_atomic_a64 field in Gen8NativeInstruction to be compatible
> with gen7_atomic_op.
> Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
> ---
>  backend/src/backend/gen/gen_mesa_disasm.c          |   2 +-
>  backend/src/backend/gen8_context.cpp               |  11 ++
>  backend/src/backend/gen8_context.hpp               |   1 +
>  backend/src/backend/gen8_encoder.cpp               |  38 ++++++
>  backend/src/backend/gen8_encoder.hpp               |   2 +
>  backend/src/backend/gen8_instruction.hpp           |  14 +++
>  backend/src/backend/gen_context.cpp                |   3 +
>  backend/src/backend/gen_context.hpp                |   1 +
>  backend/src/backend/gen_defs.hpp                   |   1 +
>  backend/src/backend/gen_encoder.cpp                |   8 ++
>  backend/src/backend/gen_encoder.hpp                |   3 +
>  .../src/backend/gen_insn_gen7_schedule_info.hxx    |   1 +
>  backend/src/backend/gen_insn_selection.cpp         | 133
> ++++++++++++++++++++-
>  backend/src/backend/gen_insn_selection.hxx         |   1 +
>  14 files changed, 212 insertions(+), 7 deletions(-)
> 
> diff --git a/backend/src/backend/gen/gen_mesa_disasm.c
> b/backend/src/backend/gen/gen_mesa_disasm.c
> index 52dfcd6..82a7524 100644
> --- a/backend/src/backend/gen/gen_mesa_disasm.c
> +++ b/backend/src/backend/gen/gen_mesa_disasm.c
> @@ -557,7 +557,7 @@ static int gen_version;
>  #define UNTYPED_RW_SIMD_MODE(inst) GEN_BITS_FIELD(inst,
> bits3.gen7_untyped_rw.simd_mode)
>  #define UNTYPED_RW_CATEGORY(inst)  GEN_BITS_FIELD(inst,
> bits3.gen7_untyped_rw.category)
>  #define UNTYPED_RW_MSG_TYPE(inst)  GEN_BITS_FIELD(inst,
> bits3.gen7_untyped_rw.msg_type)
> -#define UNTYPED_RW_AOP_TYPE(inst)  GEN_BITS_FIELD(inst,
> bits3.gen7_atomic_op.aop_type)
> +#define UNTYPED_RW_AOP_TYPE(inst)  GEN_BITS_FIELD2(inst,
> bits3.gen7_atomic_op.aop_type, bits3.gen8_atomic_a64.aop_type)
>  #define SCRATCH_RW_OFFSET(inst)    GEN_BITS_FIELD(inst,
> bits3.gen7_scratch_rw.offset)
>  #define SCRATCH_RW_BLOCK_SIZE(inst) GEN_BITS_FIELD(inst,
> bits3.gen7_scratch_rw.block_size)
>  #define SCRATCH_RW_INVALIDATE_AFTER_READ(inst) GEN_BITS_FIELD(inst,
> bits3.gen7_scratch_rw.invalidate_after_read)
> diff --git a/backend/src/backend/gen8_context.cpp
> b/backend/src/backend/gen8_context.cpp
> index f666a20..d19b2c0 100644
> --- a/backend/src/backend/gen8_context.cpp
> +++ b/backend/src/backend/gen8_context.cpp
> @@ -1029,6 +1029,17 @@ namespace gbe
> 
>      p->UNTYPED_WRITEA64(addr, elemNum*2);
>    }
> +  void Gen8Context::emitAtomicA64Instruction(const SelectionInstruction
> &insn)
> +  {
> +    const GenRegister src = ra->genReg(insn.src(0));
> +    const GenRegister dst = ra->genReg(insn.dst(0));
> +    const uint32_t function = insn.extra.function;
> +    unsigned srcNum = insn.extra.elem;
> +    const GenRegister bti = ra->genReg(insn.src(srcNum));
> +    GBE_ASSERT(bti.value.ud == 0xff);
> +    p->ATOMICA64(dst, function, src, bti, srcNum);
> +  }
> +
>    void Gen8Context::emitPackLongInstruction(const SelectionInstruction &insn)
> {
>      const GenRegister src = ra->genReg(insn.src(0));
>      const GenRegister dst = ra->genReg(insn.dst(0));
> diff --git a/backend/src/backend/gen8_context.hpp
> b/backend/src/backend/gen8_context.hpp
> index dbee885..dbe5280 100644
> --- a/backend/src/backend/gen8_context.hpp
> +++ b/backend/src/backend/gen8_context.hpp
> @@ -74,6 +74,7 @@ namespace gbe
>      virtual void emitRead64Instruction(const SelectionInstruction &insn);
>      virtual void emitWrite64A64Instruction(const SelectionInstruction &insn);
>      virtual void emitRead64A64Instruction(const SelectionInstruction &insn);
> +    virtual void emitAtomicA64Instruction(const SelectionInstruction &insn);
>      virtual void emitI64MULInstruction(const SelectionInstruction &insn);
>      virtual void emitI64DIVREMInstruction(const SelectionInstruction &insn);
> 
> diff --git a/backend/src/backend/gen8_encoder.cpp
> b/backend/src/backend/gen8_encoder.cpp
> index ee5e6ee..d320290 100644
> --- a/backend/src/backend/gen8_encoder.cpp
> +++ b/backend/src/backend/gen8_encoder.cpp
> @@ -168,6 +168,44 @@ namespace gbe
>        this->setSrc1(insn, bti);
>      }
>    }
> +
> +  unsigned Gen8Encoder::setAtomicA64MessageDesc(GenNativeInstruction
> *insn, unsigned function, unsigned bti, unsigned srcNum) {
> +    Gen8NativeInstruction *gen8_insn = &insn->gen8_insn;
> +    uint32_t msg_length = 0;
> +    uint32_t response_length = 0;
> +
> +    if (this->curr.execWidth == 8) {
> +      msg_length = srcNum + 1;
> +      response_length = 1;
> +    } else if (this->curr.execWidth == 16) {
> +      msg_length = 2 * (srcNum + 1);
> +      response_length = 2;
> +    } else
> +      NOT_IMPLEMENTED;
> +
> +    const GenMessageTarget sfid = GEN_SFID_DATAPORT1_DATA;
> +    setMessageDescriptor(insn, sfid, msg_length, response_length);
> +    gen8_insn->bits3.gen8_atomic_a64.msg_type =
> GEN8_P1_UNTYPED_ATOMIC_A64;
> +    gen8_insn->bits3.gen8_atomic_a64.bti = bti;
> +    gen8_insn->bits3.gen8_atomic_a64.return_data = 1;
> +    gen8_insn->bits3.gen8_atomic_a64.aop_type = function;
> +    gen8_insn->bits3.gen8_atomic_a64.data_size = 0;
> +
> +    return gen8_insn->bits3.ud;
> +  }
> +
> +  void Gen8Encoder::ATOMICA64(GenRegister dst, uint32_t function,
> GenRegister src, GenRegister bti, uint32_t srcNum) {
> +    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> +
> +    this->setHeader(insn);
> +    insn->header.destreg_or_condmod = GEN_SFID_DATAPORT_DATA;
> +
> +    this->setDst(insn, GenRegister::uw16grf(dst.nr, 0));
> +    this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0));
> +    this->setSrc1(insn, GenRegister::immud(0));
> +    setAtomicA64MessageDesc(insn, function, bti.value.ud, srcNum);
> +  }
> +
>    unsigned Gen8Encoder::setUntypedReadMessageDesc(GenNativeInstruction
> *insn, unsigned bti, unsigned elemNum) {
>      uint32_t msg_length = 0;
>      uint32_t response_length = 0;
> diff --git a/backend/src/backend/gen8_encoder.hpp
> b/backend/src/backend/gen8_encoder.hpp
> index 8b74278..3e23df6 100644
> --- a/backend/src/backend/gen8_encoder.hpp
> +++ b/backend/src/backend/gen8_encoder.hpp
> @@ -46,6 +46,7 @@ namespace gbe
>      virtual void LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double value);
>      virtual void LOAD_INT64_IMM(GenRegister dest, GenRegister value);
>      virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src,
> GenRegister bti, uint32_t srcNum);
> +    virtual void ATOMICA64(GenRegister dst, uint32_t function, GenRegister src,
> GenRegister bti, uint32_t srcNum);
>      virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister
> bti, uint32_t elemNum);
>      virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t
> elemNum);
>      virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t
> elemNum);
> @@ -68,6 +69,7 @@ namespace gbe
>                              GenRegister src1 = GenRegister::null());
>      virtual void handleDouble(GenEncoder *p, uint32_t opcode, GenRegister dst,
> GenRegister src0, GenRegister src1 = GenRegister::null());
>      virtual unsigned setAtomicMessageDesc(GenNativeInstruction *insn,
> unsigned function, unsigned bti, unsigned srcNum);
> +    virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn,
> unsigned function, unsigned bti, unsigned srcNum);
>      virtual unsigned setUntypedReadMessageDesc(GenNativeInstruction *insn,
> unsigned bti, unsigned elemNum);
>      virtual unsigned setUntypedWriteMessageDesc(GenNativeInstruction *insn,
> unsigned bti, unsigned elemNum);
>      void setSrc0WithAcc(GenNativeInstruction *insn, GenRegister reg, uint32_t
> accN);
> diff --git a/backend/src/backend/gen8_instruction.hpp
> b/backend/src/backend/gen8_instruction.hpp
> index 1b5dafc..b45376d 100644
> --- a/backend/src/backend/gen8_instruction.hpp
> +++ b/backend/src/backend/gen8_instruction.hpp
> @@ -566,6 +566,20 @@ union Gen8NativeInstruction
>          uint32_t end_of_thread:1;
>        } gen7_atomic_op;
> 
> +      /*! atomic a64 messages */
> +      struct {
> +        uint32_t bti:8;
> +        uint32_t aop_type:4;
> +        uint32_t data_size:1;
> +        uint32_t return_data:1;
> +        uint32_t msg_type:5;
> +        uint32_t header_present:1;
> +        uint32_t response_length:5;
> +        uint32_t msg_length:4;
> +        uint32_t pad3:2;
> +        uint32_t end_of_thread:1;
> +      } gen8_atomic_a64;
> +
>        // gen8 untyped read/write
>        struct {
>          uint32_t bti:8;
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index cef4e4c..05359af 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -2250,6 +2250,9 @@ namespace gbe
>    void GenContext::emitWrite64A64Instruction(const SelectionInstruction &insn)
> {
>      assert(0);
>    }
> +  void GenContext::emitAtomicA64Instruction(const SelectionInstruction &insn)
> {
> +    assert(0);
> +  }
> 
>    void GenContext::emitUnpackByteInstruction(const SelectionInstruction &insn)
> {
>      const GenRegister src = ra->genReg(insn.src(0));
> diff --git a/backend/src/backend/gen_context.hpp
> b/backend/src/backend/gen_context.hpp
> index 30e1ab0..f050548 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -158,6 +158,7 @@ namespace gbe
>      virtual void emitWrite64Instruction(const SelectionInstruction &insn);
>      virtual void emitRead64A64Instruction(const SelectionInstruction &insn);
>      virtual void emitWrite64A64Instruction(const SelectionInstruction &insn);
> +    virtual void emitAtomicA64Instruction(const SelectionInstruction &insn);
>      void emitUntypedReadInstruction(const SelectionInstruction &insn);
>      void emitUntypedWriteInstruction(const SelectionInstruction &insn);
>      virtual void emitUntypedReadA64Instruction(const SelectionInstruction
> &insn);
> diff --git a/backend/src/backend/gen_defs.hpp
> b/backend/src/backend/gen_defs.hpp
> index fb43718..586c9a1 100644
> --- a/backend/src/backend/gen_defs.hpp
> +++ b/backend/src/backend/gen_defs.hpp
> @@ -359,6 +359,7 @@ enum GenMessageTarget {
> 
>  #define GEN8_P1_BYTE_GATHER_A64       16 //10000
>  #define GEN8_P1_UNTYPED_READ_A64      17 //10001
> +#define GEN8_P1_UNTYPED_ATOMIC_A64    18 //10010
>  #define GEN8_P1_UNTYPED_WRITE_A64     25 //11001
>  #define GEN8_P1_BYTE_SCATTER_A64      26 //11010
> 
> diff --git a/backend/src/backend/gen_encoder.cpp
> b/backend/src/backend/gen_encoder.cpp
> index 7161d49..3f2fdbf 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -405,6 +405,10 @@ namespace gbe
>      assert(0);
>    }
> 
> +  void GenEncoder::ATOMICA64(GenRegister dst, uint32_t function,
> GenRegister src, GenRegister bti, uint32_t srcNum) {
> +    assert(0);
> +  }
> +
>    void GenEncoder::UNTYPED_WRITE(GenRegister msg, GenRegister bti,
> uint32_t elemNum) {
>      GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
>      assert(elemNum >= 1 || elemNum <= 4);
> @@ -590,6 +594,10 @@ namespace gbe
>        NOT_SUPPORTED;
>      return insn->bits3.ud;
>    }
> +  unsigned GenEncoder::setAtomicA64MessageDesc(GenNativeInstruction
> *insn, unsigned function, unsigned bti, unsigned srcNum) {
> +    GBE_ASSERT(0);
> +    return 0;
> +  }
> 
>    void GenEncoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister src,
> GenRegister bti, uint32_t srcNum) {
>      GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> diff --git a/backend/src/backend/gen_encoder.hpp
> b/backend/src/backend/gen_encoder.hpp
> index f8d81c9..fb478d2 100644
> --- a/backend/src/backend/gen_encoder.hpp
> +++ b/backend/src/backend/gen_encoder.hpp
> @@ -168,6 +168,8 @@ namespace gbe
>      void WAIT(void);
>      /*! Atomic instructions */
>      virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src,
> GenRegister bti, uint32_t srcNum);
> +    /*! AtomicA64 instructions */
> +    virtual void ATOMICA64(GenRegister dst, uint32_t function, GenRegister src,
> GenRegister bti, uint32_t srcNum);
>      /*! Untyped read (upto 4 channels) */
>      virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister
> bti, uint32_t elemNum);
>      /*! Untyped write (upto 4 channels) */
> @@ -237,6 +239,7 @@ namespace gbe
>                                unsigned msg_length, unsigned response_length,
>                                bool header_present = false, bool end_of_thread = false);
>      virtual unsigned setAtomicMessageDesc(GenNativeInstruction *insn,
> unsigned function, unsigned bti, unsigned srcNum);
> +    virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn,
> unsigned function, unsigned bti, unsigned srcNum);
>      virtual unsigned setUntypedReadMessageDesc(GenNativeInstruction *insn,
> unsigned bti, unsigned elemNum);
>      virtual unsigned setUntypedWriteMessageDesc(GenNativeInstruction *insn,
> unsigned bti, unsigned elemNum);
>      unsigned setByteGatherMessageDesc(GenNativeInstruction *insn, unsigned
> bti, unsigned elemSize);
> diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> index 15eac79..792014f 100644
> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> @@ -46,6 +46,7 @@ DECL_GEN7_SCHEDULE(TypedWrite,      80,        1,        1)
>  DECL_GEN7_SCHEDULE(SpillReg,        20,        1,        1)
>  DECL_GEN7_SCHEDULE(UnSpillReg,      160,       1,        1)
>  DECL_GEN7_SCHEDULE(Atomic,          80,        1,        1)
> +DECL_GEN7_SCHEDULE(AtomicA64,       80,        1,        1)
>  DECL_GEN7_SCHEDULE(I64MUL,          20,        40,      20)
>  DECL_GEN7_SCHEDULE(I64SATADD,       20,        40,      20)
>  DECL_GEN7_SCHEDULE(I64SATSUB,       20,        40,      20)
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index d19f985..09de170 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -187,6 +187,7 @@ namespace gbe
>             this->opcode == SEL_OP_READ64          ||
>             this->opcode == SEL_OP_READ64A64       ||
>             this->opcode == SEL_OP_ATOMIC          ||
> +           this->opcode == SEL_OP_ATOMICA64       ||
>             this->opcode == SEL_OP_BYTE_GATHER     ||
>             this->opcode == SEL_OP_BYTE_GATHERA64  ||
>             this->opcode == SEL_OP_SAMPLE          ||
> @@ -213,6 +214,7 @@ namespace gbe
>             this->opcode == SEL_OP_WRITE64          ||
>             this->opcode == SEL_OP_WRITE64A64       ||
>             this->opcode == SEL_OP_ATOMIC           ||
> +           this->opcode == SEL_OP_ATOMICA64        ||
>             this->opcode == SEL_OP_BYTE_SCATTER     ||
>             this->opcode == SEL_OP_BYTE_SCATTERA64  ||
>             this->opcode == SEL_OP_TYPED_WRITE;
> @@ -629,6 +631,8 @@ namespace gbe
>      void WAIT(void);
>      /*! Atomic instruction */
>      void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg src1,
> Reg src2, GenRegister bti, vector<GenRegister> temps);
> +    /*! AtomicA64 instruction */
> +    void ATOMICA64(Reg dst, uint32_t function, uint32_t srcNum,
> vector<GenRegister> src, GenRegister bti, vector<GenRegister> temps);
>      /*! Read 64 bits float/int array */
>      void READ64(Reg addr, const GenRegister *dst, const GenRegister *tmp,
> uint32_t elemNum, const GenRegister bti, bool native_long, vector<GenRegister>
> temps);
>      /*! Write 64 bits float/int array */
> @@ -1304,6 +1308,33 @@ namespace gbe
>      vector->isSrc = 1;
>    }
> 
> +  void Selection::Opaque::ATOMICA64(Reg dst, uint32_t function,
> +                                 uint32_t msgPayload, vector<GenRegister> src,
> +                                 GenRegister bti,
> +                                 vector<GenRegister> temps) {
> +    unsigned dstNum = 1 + temps.size();
> +    SelectionInstruction *insn = this->appendInsn(SEL_OP_ATOMICA64, dstNum,
> msgPayload + 1);
> +
> +    insn->dst(0) = dst;
> +    if(temps.size()) {
> +      insn->dst(1) = temps[0];
> +      insn->dst(2) = temps[1];
> +    }
> +
> +    for (uint32_t elemID = 0; elemID < msgPayload; ++elemID)
> +      insn->src(elemID) = src[elemID];
> +    insn->src(msgPayload) = bti;
> +
> +    insn->extra.function = function;
> +    insn->extra.elem = msgPayload;
> +
> +    SelectionVector *vector = this->appendVector();
> +    vector->regNum = msgPayload; //bti not included in SelectionVector
> +    vector->offsetID = 0;
> +    vector->reg = &insn->src(0);
> +    vector->isSrc = 1;
> +  }
> +
>    void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); }
>    void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); }
>    void Selection::Opaque::WAIT(void) { this->appendInsn(SEL_OP_WAIT, 0, 0); }
> @@ -5481,34 +5512,124 @@ namespace gbe
>            this->opcodes.push_back(ir::Opcode(op));
>      }
> 
> +    /* Used to transform address from 64bit to 32bit, note as dataport messages
> +     * cannot accept scalar register, so here to convert to non-uniform
> +     * register here. */
> +    GenRegister convertU64ToU32(Selection::Opaque &sel,
> +                                GenRegister addr) const {
> +      GenRegister unpacked = GenRegister::retype(sel.unpacked_ud(addr.reg()),
> GEN_TYPE_UD);
> +      GenRegister dst = sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32);
> +      sel.MOV(dst, unpacked);
> +      return dst;
> +    }
> +
> +    void untypedAtomicA64Stateless(Selection::Opaque &sel,
> +                              const ir::AtomicInstruction &insn,
> +                              unsigned msgPayload,
> +                              GenRegister dst,
> +                              GenRegister addr,
> +                              GenRegister src1,
> +                              GenRegister src2,
> +                              GenRegister bti) const {
> +      using namespace ir;
> +      GenRegister addrQ;
> +      const AtomicOps atomicOp = insn.getAtomicOpcode();
> +      GenAtomicOpCode genAtomicOp = (GenAtomicOpCode)atomicOp;
> +      unsigned addrBytes = typeSize(addr.type);
> +      GBE_ASSERT(msgPayload <= 3);
> +
> +      unsigned simdWidth = sel.curr.execWidth;
> +      AddressMode AM = insn.getAddressMode();
> +      if (addrBytes == 4) {
> +        addrQ = sel.selReg(sel.reg(ir::FAMILY_QWORD), ir::TYPE_U64);
> +        sel.MOV(addrQ, addr);
> +      } else {
> +        addrQ = addr;
> +      }
> +
> +      if (simdWidth == 8) {
> +        vector<GenRegister> msgs;
> +        msgs.push_back(addr);
> +        msgs.push_back(src1);
> +        msgs.push_back(src2);
> +        sel.ATOMICA64(dst, genAtomicOp, msgPayload, msgs, bti,
> sel.getBTITemps(AM));
> +      } else if (simdWidth == 16) {
> +        vector<GenRegister> msgs;
> +        for (unsigned k = 0; k < msgPayload; k++) {
> +          msgs.push_back(sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32));
> +        }
> +        sel.push();
> +        /* first quarter */
> +        sel.curr.execWidth = 8;
> +        sel.curr.quarterControl = GEN_COMPRESSION_Q1;
> +        sel.MOV(GenRegister::retype(msgs[0], GEN_TYPE_UL),
> GenRegister::Qn(addrQ, 0));
> +        if(msgPayload > 1)
> +          sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 0));
> +        if(msgPayload > 2)
> +          sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 0));
> +        sel.ATOMICA64(GenRegister::Qn(dst, 0), genAtomicOp, msgPayload, msgs,
> bti, sel.getBTITemps(AM));
> +
> +        /* second quarter */
> +        sel.curr.execWidth = 8;
> +        sel.curr.quarterControl = GEN_COMPRESSION_Q2;
> +        sel.MOV(GenRegister::retype(msgs[0], GEN_TYPE_UL),
> GenRegister::Qn(addrQ, 1));
> +        if(msgPayload > 1)
> +          sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 1));
> +        if(msgPayload > 2)
> +          sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 1));
> +        sel.ATOMICA64(GenRegister::Qn(dst, 1), genAtomicOp, msgPayload, msgs,
> bti, sel.getBTITemps(AM));
> +        sel.pop();
> +      }
> +    }
> +
>      INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
>        using namespace ir;
>        const ir::AtomicInstruction &insn = cast<ir::AtomicInstruction>(dag.insn);
> 
> -      ir::BTI b;
>        const AtomicOps atomicOp = insn.getAtomicOpcode();
>        unsigned srcNum = insn.getSrcNum();
>        unsigned msgPayload;
> +      Register reg = insn.getAddressRegister();
> +      GenRegister address = sel.selReg(reg, getType(sel.getRegisterFamily(reg)));
> +      AddressSpace addrSpace = insn.getAddressSpace();
> +      GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL ||
> +                 insn.getAddressSpace() == MEM_PRIVATE ||
> +                 insn.getAddressSpace() == MEM_LOCAL ||
> +                 insn.getAddressSpace() == MEM_GENERIC ||
> +                 insn.getAddressSpace() == MEM_MIXED);
> +      unsigned addrBytes = typeSize(address.type);
> 
>        AddressMode AM = insn.getAddressMode();
>        if (AM == AM_DynamicBti) {
> -        b.reg = insn.getBtiReg();
>          msgPayload = srcNum - 1;
>        } else {
> -        b.imm = insn.getSurfaceIndex();
> -        b.isConst = 1;
>          msgPayload = srcNum;
>        }
> 
>        GenRegister dst  = sel.selReg(insn.getDst(0), TYPE_U32);
> -      GenRegister bti =  b.isConst ? GenRegister::immud(b.imm) : sel.selReg(b.reg,
> ir::TYPE_U32);
>        GenRegister src0 = sel.selReg(insn.getAddressRegister(), TYPE_U32);
>        GenRegister src1 = src0, src2 = src0;
>        if(msgPayload > 1) src1 = sel.selReg(insn.getSrc(1), TYPE_U32);
>        if(msgPayload > 2) src2 = sel.selReg(insn.getSrc(2), TYPE_U32);
> 
>        GenAtomicOpCode genAtomicOp = (GenAtomicOpCode)atomicOp;
> -      sel.ATOMIC(dst, genAtomicOp, msgPayload, src0, src1, src2, bti,
> sel.getBTITemps(AM));
> +      if (AM == AM_DynamicBti || AM == AM_StaticBti) {
> +        if (AM == AM_DynamicBti) {
> +          Register btiReg = insn.getBtiReg();
> +          sel.ATOMIC(dst, genAtomicOp, msgPayload, address, src1, src2,
> sel.selReg(btiReg, TYPE_U32), sel.getBTITemps(AM));
> +        } else {
> +          unsigned SI = insn.getSurfaceIndex();
> +          sel.ATOMIC(dst, genAtomicOp, msgPayload, address, src1, src2,
> GenRegister::immud(SI), sel.getBTITemps(AM));
> +        }
> +      } else if (addrSpace == ir::MEM_LOCAL) {
> +        // stateless mode, local still use bti access
> +        GenRegister addrDW = address;
> +        if (addrBytes == 8)
> +          addrDW = convertU64ToU32(sel, address);
> +        sel.ATOMIC(dst, genAtomicOp, msgPayload, addrDW, src1, src2,
> GenRegister::immud(0xfe), sel.getBTITemps(AM));
> +      }
> +      else
> +        untypedAtomicA64Stateless(sel, insn, msgPayload, dst, address, src1, src2,
> GenRegister::immud(0xff));
> 
>        markAllChildren(dag);
>        return true;
> diff --git a/backend/src/backend/gen_insn_selection.hxx
> b/backend/src/backend/gen_insn_selection.hxx
> index 1fbcb1a..f6ed284 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -40,6 +40,7 @@ DECL_SELECTION_IR(I64MUL, I64MULInstruction)
>  DECL_SELECTION_IR(I64DIV, I64DIVREMInstruction)
>  DECL_SELECTION_IR(I64REM, I64DIVREMInstruction)
>  DECL_SELECTION_IR(ATOMIC, AtomicInstruction)
> +DECL_SELECTION_IR(ATOMICA64, AtomicA64Instruction)
>  DECL_SELECTION_IR(MACH, BinaryInstruction)
>  DECL_SELECTION_IR(CMP, CompareInstruction)
>  DECL_SELECTION_IR(I64CMP, I64CompareInstruction)
> --
> 2.1.4
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list