[Beignet] [PATCH V3] GBE: Refine ir for memory operation like atomic/load/store
Yang, Rong R
rong.r.yang at intel.com
Thu Nov 5 23:01:27 PST 2015
Pushed.
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Ruiling Song
> Sent: Wednesday, November 4, 2015 15:50
> To: beignet at lists.freedesktop.org
> Cc: Song, Ruiling
> Subject: [Beignet] [PATCH V3] GBE: Refine ir for memory operation like
> atomic/load/store
>
> the legacyMode means what kind of address mode to choose.
> when legacyMode is true, we need to do complex bti analysis.
>
> dynamicBti and staticBti are most for platforms before BDW.
> And stateless is for platform BDW+
>
> v2:
> only do analyzePointerOrigin() under legacyMode.
>
> v3:
> fix conflict with master, and some reorder warning.
>
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
> backend/src/backend/gen_insn_selection.cpp | 132 ++++------
> backend/src/ir/context.hpp | 19 --
> backend/src/ir/instruction.cpp | 410 +++++++++++++++++------------
> backend/src/ir/instruction.hpp | 78 +++---
> backend/src/ir/lowering.cpp | 4 +-
> backend/src/llvm/llvm_gen_backend.cpp | 393 +++++++++++++-----------
> ---
> 6 files changed, 531 insertions(+), 505 deletions(-)
>
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index 2452aea..5ec420e 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -1254,11 +1254,11 @@ namespace gbe
> }
>
> void Selection::Opaque::ATOMIC(Reg dst, uint32_t function,
> - uint32_t srcNum, Reg src0,
> + uint32_t msgPayload, Reg src0,
> Reg src1, Reg src2, GenRegister bti,
> vector<GenRegister> temps) {
> unsigned dstNum = 1 + temps.size();
> - SelectionInstruction *insn = this->appendInsn(SEL_OP_ATOMIC, dstNum,
> srcNum + 1);
> + SelectionInstruction *insn = this->appendInsn(SEL_OP_ATOMIC, dstNum,
> msgPayload + 1);
>
> if (bti.file != GEN_IMMEDIATE_VALUE) {
> insn->state.flag = 0;
> @@ -1272,14 +1272,15 @@ namespace gbe
> }
>
> insn->src(0) = src0;
> - if(srcNum > 1) insn->src(1) = src1;
> - if(srcNum > 2) insn->src(2) = src2;
> - insn->src(srcNum) = bti;
> + if(msgPayload > 1) insn->src(1) = src1;
> + if(msgPayload > 2) insn->src(2) = src2;
> + insn->src(msgPayload) = bti;
> +
> insn->extra.function = function;
> - insn->extra.elem = srcNum;
> + insn->extra.elem = msgPayload;
>
> SelectionVector *vector = this->appendVector();
> - vector->regNum = srcNum;
> + vector->regNum = msgPayload; //bti not included in SelectionVector
> vector->offsetID = 0;
> vector->reg = &insn->src(0);
> vector->isSrc = 1;
> @@ -3424,8 +3425,6 @@ namespace gbe
> uint32_t valueNum,
> ir::BTI bti) const
> {
> - //GenRegister temp = getRelativeAddress(sel, addr, sel.selReg(bti.base,
> ir::TYPE_U32));
> -
> GenRegister b = bti.isConst ? GenRegister::immud(bti.imm) :
> sel.selReg(bti.reg, ir::TYPE_U32);
> sel.UNTYPED_READ(addr, dst.data(), valueNum, b,
> sel.getBTITemps(bti));
> }
> @@ -3726,28 +3725,12 @@ namespace gbe
> return false;
> }
>
> - INLINE ir::BTI getBTI(SelectionDAG &dag, const ir::LoadInstruction &insn)
> const {
> - using namespace ir;
> - SelectionDAG *child0 = dag.child[0];
> - ir::BTI b;
> - if (insn.isFixedBTI()) {
> - const auto &immInsn = cast<LoadImmInstruction>(child0->insn);
> - const auto imm = immInsn.getImmediate();
> - b.isConst = 1;
> - b.imm = imm.getIntegerValue();
> - } else {
> - b.isConst = 0;
> - b.reg = insn.getBTI();
> - }
> - return b;
> - }
> -
> /*! Implements base class */
> virtual bool emit(Selection::Opaque &sel, SelectionDAG &dag) const
> {
> using namespace ir;
> const ir::LoadInstruction &insn = cast<ir::LoadInstruction>(dag.insn);
> - GenRegister address = sel.selReg(insn.getAddress(), ir::TYPE_U32);
> + GenRegister address = sel.selReg(insn.getAddressRegister(),
> ir::TYPE_U32);
> GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL ||
> insn.getAddressSpace() == MEM_CONSTANT ||
> insn.getAddressSpace() == MEM_PRIVATE ||
> @@ -3755,8 +3738,17 @@ namespace gbe
> insn.getAddressSpace() == MEM_MIXED);
> //GBE_ASSERT(sel.isScalarReg(insn.getValue(0)) == false);
>
> - BTI bti = getBTI(dag, insn);
> -
> + BTI bti;
> + AddressMode am = insn.getAddressMode();
> + if (am == AM_StaticBti) {
> + bti.isConst = 1;
> + bti.imm = insn.getSurfaceIndex();
> + } else if (am == AM_DynamicBti) {
> + bti.isConst = 0;
> + bti.reg = insn.getBtiReg();
> + } else {
> + assert(0 && "stateless not supported yet");
> + }
> const Type type = insn.getValueType();
> const uint32_t elemSize = getByteScatterGatherSize(sel, type);
> bool allConstant = isAllConstant(bti);
> @@ -3784,12 +3776,7 @@ namespace gbe
> this->emitUnalignedByteGather(sel, insn, elemSize, address, bti);
> }
>
> -
> - // for fixed bti, don't generate the useless loadi
> - if (insn.isFixedBTI())
> - dag.child[0] = NULL;
> markAllChildren(dag);
> -
> return true;
> }
> };
> @@ -3893,32 +3880,26 @@ namespace gbe
> }
> }
>
> -
> - INLINE ir::BTI getBTI(SelectionDAG &dag, const ir::StoreInstruction &insn)
> const {
> - using namespace ir;
> - SelectionDAG *child0 = dag.child[0];
> - ir::BTI b;
> - if (insn.isFixedBTI()) {
> - const auto &immInsn = cast<LoadImmInstruction>(child0->insn);
> - const auto imm = immInsn.getImmediate();
> - b.isConst = 1;
> - b.imm = imm.getIntegerValue();
> - } else {
> - b.isConst = 0;
> - b.reg = insn.getBTI();
> - }
> - return b;
> - }
> virtual bool emit(Selection::Opaque &sel, SelectionDAG &dag) const
> {
> using namespace ir;
> const ir::StoreInstruction &insn = cast<ir::StoreInstruction>(dag.insn);
> - GenRegister address = sel.selReg(insn.getAddress(), ir::TYPE_U32);
> + GenRegister address = sel.selReg(insn.getAddressRegister(),
> ir::TYPE_U32);
> const Type type = insn.getValueType();
> const uint32_t elemSize = getByteScatterGatherSize(sel, type);
>
> - const bool isUniform = sel.isScalarReg(insn.getAddress()) &&
> sel.isScalarReg(insn.getValue(0));
> - BTI bti = getBTI(dag, insn);
> + const bool isUniform = sel.isScalarReg(insn.getAddressRegister()) &&
> sel.isScalarReg(insn.getValue(0));
> + BTI bti;
> + AddressMode am = insn.getAddressMode();
> + if (am == AM_StaticBti) {
> + bti.isConst = 1;
> + bti.imm = insn.getSurfaceIndex();
> + } else if (am == AM_DynamicBti) {
> + bti.isConst = 0;
> + bti.reg = insn.getBtiReg();
> + } else {
> + assert(0 && "stateless not supported yet");
> + }
>
> if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)
> this->emitWrite64(sel, insn, address, bti);
> @@ -3928,11 +3909,7 @@ namespace gbe
> this->emitByteScatter(sel, insn, elemSize, address, bti, isUniform);
> }
>
> - // for fixed bti, don't generate the useless loadi
> - if (insn.isFixedBTI())
> - dag.child[0] = NULL;
> markAllChildren(dag);
> -
> return true;
> }
> };
> @@ -4795,47 +4772,36 @@ namespace gbe
> this->opcodes.push_back(ir::Opcode(op));
> }
>
> - INLINE ir::BTI getBTI(SelectionDAG &dag, const ir::AtomicInstruction &insn)
> const {
> - using namespace ir;
> - SelectionDAG *child0 = dag.child[0];
> - ir::BTI b;
> - if (insn.isFixedBTI()) {
> - const auto &immInsn = cast<LoadImmInstruction>(child0->insn);
> - const auto imm = immInsn.getImmediate();
> - b.isConst = 1;
> - b.imm = imm.getIntegerValue();
> - } else {
> - b.isConst = 0;
> - b.reg = insn.getBTI();
> - }
> - return b;
> - }
> -
> INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
> using namespace ir;
> const ir::AtomicInstruction &insn = cast<ir::AtomicInstruction>(dag.insn);
>
> - ir::BTI b = getBTI(dag, insn);
> + ir::BTI b;
> const AtomicOps atomicOp = insn.getAtomicOpcode();
> unsigned srcNum = insn.getSrcNum();
> - unsigned opNum = srcNum - 1;
> + unsigned msgPayload;
> +
> + AddressMode AM = insn.getAddressMode();
> + if (AM == AM_DynamicBti) {
> + b.reg = insn.getBtiReg();
> + msgPayload = srcNum - 1;
> + } else {
> + b.imm = insn.getSurfaceIndex();
> + b.isConst = 1;
> + msgPayload = srcNum;
> + }
>
> GenRegister dst = sel.selReg(insn.getDst(0), TYPE_U32);
> GenRegister bti = b.isConst ? GenRegister::immud(b.imm) :
> sel.selReg(b.reg, ir::TYPE_U32);
> - GenRegister src0 = sel.selReg(insn.getSrc(1), TYPE_U32); //address
> + GenRegister src0 = sel.selReg(insn.getAddressRegister(), TYPE_U32);
> GenRegister src1 = src0, src2 = src0;
> - if(srcNum > 2) src1 = sel.selReg(insn.getSrc(2), TYPE_U32);
> - if(srcNum > 3) src2 = sel.selReg(insn.getSrc(3), TYPE_U32);
> + if(msgPayload > 1) src1 = sel.selReg(insn.getSrc(1), TYPE_U32);
> + if(msgPayload > 2) src2 = sel.selReg(insn.getSrc(2), TYPE_U32);
>
> GenAtomicOpCode genAtomicOp = (GenAtomicOpCode)atomicOp;
> + sel.ATOMIC(dst, genAtomicOp, msgPayload, src0, src1, src2, bti,
> sel.getBTITemps(b));
>
> - sel.ATOMIC(dst, genAtomicOp, opNum, src0, src1, src2, bti,
> sel.getBTITemps(b));
> -
> - // for fixed bti, don't generate the useless loadi
> - if (insn.isFixedBTI())
> - dag.child[0] = NULL;
> markAllChildren(dag);
> -
> return true;
> }
> };
> diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp
> index 0f7ded4..ab0d8b5 100644
> --- a/backend/src/ir/context.hpp
> +++ b/backend/src/ir/context.hpp
> @@ -189,25 +189,6 @@ namespace ir {
> this->append(insn);
> }
>
> - /*! LOAD with the destinations directly specified */
> - template <typename... Args>
> - void LOAD(Type type, Register offset, AddressSpace space, bool
> dwAligned, bool fixedBTI, Register bti, Args...values)
> - {
> - const Tuple index = this->tuple(values...);
> - const uint16_t valueNum = std::tuple_size<std::tuple<Args...>>::value;
> - GBE_ASSERT(valueNum > 0);
> - this->LOAD(type, index, offset, space, valueNum, dwAligned, fixedBTI,
> bti);
> - }
> -
> - /*! STORE with the sources directly specified */
> - template <typename... Args>
> - void STORE(Type type, Register offset, AddressSpace space, bool
> dwAligned, bool fixedBTI, Register bti, Args...values)
> - {
> - const Tuple index = this->tuple(values...);
> - const uint16_t valueNum = std::tuple_size<std::tuple<Args...>>::value;
> - GBE_ASSERT(valueNum > 0);
> - this->STORE(type, index, offset, space, valueNum, dwAligned, fixedBTI,
> bti);
> - }
> void appendSurface(uint8_t bti, Register reg) { fn->appendSurface(bti,
> reg); }
>
> protected:
> diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
> index f93c528..0246920 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -316,60 +316,104 @@ namespace ir {
> Type srcType; //!< Type to convert from
> };
>
> + class ALIGNED_INSTRUCTION MemInstruction :
> + public BasePolicy
> + {
> + public:
> + MemInstruction(AddressMode _AM,
> + AddressSpace _AS,
> + bool _dwAligned,
> + Type _type,
> + Register _offset)
> + : AM(_AM),
> + AS(_AS),
> + dwAligned(_dwAligned),
> + type(_type),
> + SurfaceIndex(0),
> + offset(_offset) {
> + }
> + AddressMode getAddressMode() const { return AM; }
> + AddressSpace getAddressSpace() const { return AS; }
> + /*! MemInstruction may have one possible btiReg */
> + Register getBtiReg() const { assert(AM == AM_DynamicBti); return
> BtiReg; }
> + unsigned getSurfaceIndex() const { assert(AM != AM_DynamicBti);
> return SurfaceIndex; }
> + Register getAddressRegister()const { return offset; }
> + unsigned getAddressIndex() const { return 0; }
> + Type getValueType() const { return type; }
> + INLINE bool isAligned(void) const { return !!dwAligned; }
> +
> + void setSurfaceIndex (unsigned id) { SurfaceIndex = id; }
> + void setBtiReg(Register reg) { BtiReg = reg; }
> + protected:
> + /*! including address reg + optional bti reg */
> + int getBaseSrcNum() const { return AM == AM_DynamicBti ? 2 : 1; }
> + bool hasExtraBtiReg() const { return AM == AM_DynamicBti; }
> + AddressMode AM;
> + AddressSpace AS;
> + uint8_t dwAligned : 1;
> + Type type;
> + union {
> + Register BtiReg;
> + unsigned SurfaceIndex;
> + };
> + Register offset;
> + };
> +
> class ALIGNED_INSTRUCTION AtomicInstruction :
> - public BasePolicy,
> + public MemInstruction,
> public NDstPolicy<AtomicInstruction, 1>
> {
> public:
> AtomicInstruction(AtomicOps atomicOp,
> + Type type,
> Register dst,
> AddressSpace addrSpace,
> - Register bti,
> - bool fixedBTI,
> - Tuple src)
> + Register address,
> + Tuple payload,
> + AddressMode AM)
> + : MemInstruction(AM, addrSpace, true, type, address)
> {
> this->opcode = OP_ATOMIC;
> this->atomicOp = atomicOp;
> this->dst[0] = dst;
> - this->src = src;
> - this->addrSpace = addrSpace;
> - this->bti = bti;
> - this->fixedBTI = fixedBTI ? 1: 0;
> - srcNum = 2;
> + this->payload = payload;
> +
> + int payloadNum = 1;
> if((atomicOp == ATOMIC_OP_INC) ||
> (atomicOp == ATOMIC_OP_DEC))
> - srcNum = 1;
> + payloadNum = 0;
> if(atomicOp == ATOMIC_OP_CMPXCHG)
> - srcNum = 3;
> - srcNum++;
> + payloadNum = 2;
> +
> + srcNum = payloadNum + getBaseSrcNum();
> }
> INLINE Register getSrc(const Function &fn, uint32_t ID) const {
> - GBE_ASSERTM(ID < srcNum, "Out-of-bound source register for atomic");
> - if (ID == 0u)
> - return bti;
> - else
> - return fn.getRegister(src, ID -1);
> + GBE_ASSERTM((int)ID < (int)srcNum, "Out-of-bound source register for
> atomic");
> + if (ID == 0) {
> + return offset;
> + } else if (hasExtraBtiReg() && (int)ID == (int)srcNum-1) {
> + return getBtiReg();
> + } else {
> + return fn.getRegister(payload, ID - 1);
> + }
> }
> INLINE void setSrc(Function &fn, uint32_t ID, Register reg) {
> - GBE_ASSERTM(ID < srcNum, "Out-of-bound source register for atomic");
> - if (ID == 0u)
> - bti = reg;
> - else
> - fn.setRegister(src, ID - 1, reg);
> + GBE_ASSERTM((int)ID < (int)srcNum, "Out-of-bound source register for
> atomic");
> + if (ID == 0) {
> + offset = reg;
> + } else if (hasExtraBtiReg() && (int)ID == (int)srcNum - 1) {
> + setBtiReg(reg);
> + } else {
> + fn.setRegister(payload, ID - 1, reg);
> + }
> }
> INLINE uint32_t getSrcNum(void) const { return srcNum; }
>
> - INLINE AddressSpace getAddressSpace(void) const { return this-
> >addrSpace; }
> - INLINE Register getBTI(void) const { return bti; }
> - INLINE bool isFixedBTI(void) const { return !!fixedBTI; }
> INLINE AtomicOps getAtomicOpcode(void) const { return this-
> >atomicOp; }
> INLINE bool wellFormed(const Function &fn, std::string &whyNot) const;
> INLINE void out(std::ostream &out, const Function &fn) const;
> Register dst[1];
> - Tuple src;
> - AddressSpace addrSpace; //!< Address space
> - Register bti; //!< bti
> - uint8_t fixedBTI:1; //!< fixed bti or not
> + Tuple payload;
> uint8_t srcNum:3; //!<Source Number
> AtomicOps atomicOp:6; //!<Source Number
> };
> @@ -428,119 +472,102 @@ namespace ir {
> Register dst[0]; //!< No destination
> };
>
> +
> class ALIGNED_INSTRUCTION LoadInstruction :
> - public BasePolicy,
> - public NSrcPolicy<LoadInstruction, 2>
> + public MemInstruction
> {
> - public:
> - LoadInstruction(Type type,
> - Tuple dstValues,
> - Register offset,
> - AddressSpace addrSpace,
> - uint32_t valueNum,
> - bool dwAligned,
> - bool fixedBTI,
> - Register bti)
> - {
> - GBE_ASSERT(valueNum < 128);
> - this->opcode = OP_LOAD;
> - this->type = type;
> - this->offset = offset;
> - this->values = dstValues;
> - this->addrSpace = addrSpace;
> - this->valueNum = valueNum;
> - this->dwAligned = dwAligned ? 1 : 0;
> - this->fixedBTI = fixedBTI ? 1 : 0;
> - this->bti = bti;
> - }
> - INLINE Register getDst(const Function &fn, uint32_t ID) const {
> - GBE_ASSERTM(ID < valueNum, "Out-of-bound source register");
> - return fn.getRegister(values, ID);
> - }
> - INLINE void setDst(Function &fn, uint32_t ID, Register reg) {
> - GBE_ASSERTM(ID < valueNum, "Out-of-bound source register");
> - fn.setRegister(values, ID, reg);
> - }
> - INLINE uint32_t getDstNum(void) const { return valueNum; }
> - INLINE Type getValueType(void) const { return type; }
> - INLINE uint32_t getValueNum(void) const { return valueNum; }
> - INLINE AddressSpace getAddressSpace(void) const { return addrSpace; }
> - INLINE Register getBTI(void) const { return bti; }
> - INLINE bool wellFormed(const Function &fn, std::string &why) const;
> - INLINE void out(std::ostream &out, const Function &fn) const;
> - INLINE bool isAligned(void) const { return !!dwAligned; }
> - INLINE bool isFixedBTI(void) const { return !!fixedBTI; }
> - Type type; //!< Type to store
> - Register src[0]; //!< Address where to load from
> - Register bti;
> - Register offset; //!< Alias to make it similar to store
> - Tuple values; //!< Values to load
> - AddressSpace addrSpace; //!< Where to load
> - uint8_t fixedBTI:1;
> - uint8_t valueNum:7; //!< Number of values to load
> - uint8_t dwAligned:1; //!< DWORD aligned is what matters with GEN
> - };
> + public:
> + LoadInstruction(Type type,
> + Tuple dstValues,
> + Register offset,
> + AddressSpace AS,
> + uint32_t _valueNum,
> + bool dwAligned,
> + AddressMode AM)
> + : MemInstruction(AM, AS, dwAligned, type, offset),
> + valueNum(_valueNum),
> + values(dstValues)
> + {
> + this->opcode = OP_LOAD;
> + }
>
> + INLINE unsigned getSrcNum() const { return getBaseSrcNum(); }
> + INLINE Register getSrc(const Function &fn, unsigned id) const {
> + if (id == 0) return offset;
> + if (hasExtraBtiReg() && id == 1) return BtiReg;
> + assert(0 && "LoadInstruction::getSrc() out-of-range");
> + return ir::Register(0);
> + }
> + INLINE void setSrc(Function &fn, unsigned id, Register reg) {
> + assert(id < getSrcNum());
> + if (id == 0) { offset = reg; return; }
> + if (id == 1) { setBtiReg(reg); return; }
> + }
> + INLINE unsigned getDstNum() const { return valueNum; }
> + INLINE Register getDst(const Function &fn, unsigned id) const {
> + assert(id < valueNum);
> + return fn.getRegister(values, id);
> + }
> + INLINE void setDst(Function &fn, unsigned id, Register reg) {
> + assert(id < getDstNum());
> + fn.setRegister(values, id, reg);
> + }
> + INLINE uint32_t getValueNum(void) const { return valueNum; }
> + INLINE Register getValue(const Function &fn, unsigned id) const {
> + assert(id < valueNum);
> + return fn.getRegister(values, id);
> + }
> + INLINE bool wellFormed(const Function &fn, std::string &why) const;
> + INLINE void out(std::ostream &out, const Function &fn) const;
> +
> + uint8_t valueNum;
> + Tuple values;
> + };
> class ALIGNED_INSTRUCTION StoreInstruction :
> - public BasePolicy, public NDstPolicy<StoreInstruction, 0>
> + public MemInstruction,
> + public NDstPolicy<StoreInstruction, 0>
> {
> - public:
> - StoreInstruction(Type type,
> - Tuple values,
> - Register offset,
> - AddressSpace addrSpace,
> - uint32_t valueNum,
> - bool dwAligned,
> - bool fixedBTI,
> - Register bti)
> - {
> - GBE_ASSERT(valueNum < 255);
> - this->opcode = OP_STORE;
> - this->type = type;
> - this->offset = offset;
> - this->values = values;
> - this->addrSpace = addrSpace;
> - this->valueNum = valueNum;
> - this->dwAligned = dwAligned ? 1 : 0;
> - this->fixedBTI = fixedBTI ? 1 : 0;
> - this->bti = bti;
> - }
> - INLINE Register getSrc(const Function &fn, uint32_t ID) const {
> - GBE_ASSERTM(ID < valueNum + 2u, "Out-of-bound source register for
> store");
> - if (ID == 0u)
> - return bti;
> - else if (ID == 1u)
> - return offset;
> - else
> - return fn.getRegister(values, ID - 2);
> - }
> - INLINE void setSrc(Function &fn, uint32_t ID, Register reg) {
> - GBE_ASSERTM(ID < valueNum + 2u, "Out-of-bound source register for
> store");
> - if (ID == 0u)
> - bti = reg;
> - else if (ID == 1u)
> - offset = reg;
> - else
> - fn.setRegister(values, ID - 2, reg);
> - }
> - INLINE uint32_t getSrcNum(void) const { return valueNum + 2u; }
> - INLINE uint32_t getValueNum(void) const { return valueNum; }
> - INLINE Type getValueType(void) const { return type; }
> - INLINE AddressSpace getAddressSpace(void) const { return addrSpace; }
> - INLINE Register getBTI(void) const { return bti; }
> - INLINE bool wellFormed(const Function &fn, std::string &why) const;
> - INLINE void out(std::ostream &out, const Function &fn) const;
> - INLINE bool isAligned(void) const { return !!dwAligned; }
> - INLINE bool isFixedBTI(void) const { return !!fixedBTI; }
> - Type type; //!< Type to store
> - Register bti;
> - Register offset; //!< First source is the offset where to store
> - Tuple values; //!< Values to store
> - AddressSpace addrSpace; //!< Where to store
> - uint8_t fixedBTI:1; //!< Which btis need access
> - uint8_t valueNum:7; //!< Number of values to store
> - uint8_t dwAligned:1; //!< DWORD aligned is what matters with GEN
> - Register dst[0]; //!< No destination
> + public:
> + StoreInstruction(Type type,
> + Tuple values,
> + Register offset,
> + AddressSpace addrSpace,
> + uint32_t valueNum,
> + bool dwAligned,
> + AddressMode AM)
> + : MemInstruction(AM, addrSpace, dwAligned, type, offset)
> + {
> + this->opcode = OP_STORE;
> + this->values = values;
> + this->valueNum = valueNum;
> + }
> + INLINE unsigned getValueNum() const { return valueNum; }
> + INLINE Register getValue(const Function &fn, unsigned id) const {
> + return fn.getRegister(values, id);
> + }
> + INLINE unsigned getSrcNum() const { return getBaseSrcNum() +
> valueNum; }
> + INLINE Register getSrc(const Function &fn, unsigned id) const {
> + if (id == 0) return offset;
> + if (id <= valueNum) return fn.getRegister(values, id-1);
> + if (hasExtraBtiReg() && (int)id == (int)valueNum+1) return getBtiReg();
> + assert(0 && "StoreInstruction::getSrc() out-of-range");
> + return Register(0);
> + }
> + INLINE void setSrc(Function &fn, unsigned id, Register reg) {
> + if (id == 0) { offset = reg; return; }
> + if (id > 0 && id <= valueNum) { fn.setRegister(values, id-1, reg);
> return; }
> + if (hasExtraBtiReg() &&
> + (int)id == (int)valueNum + 1) {
> + setBtiReg(reg);
> + return;
> + }
> + assert(0 && "StoreInstruction::setSrc() index out-of-range");
> + }
> + INLINE bool wellFormed(const Function &fn, std::string &why) const;
> + INLINE void out(std::ostream &out, const Function &fn) const;
> + Register dst[0];
> + uint8_t valueNum;
> + Tuple values;
> };
>
> class ALIGNED_INSTRUCTION SampleInstruction : // TODO
> @@ -1037,8 +1064,6 @@ namespace ir {
> if (UNLIKELY(checkRegisterData(FAMILY_DWORD, getSrc(fn, srcID+1u),
> fn, whyNot) == false))
> return false;
>
> - if (UNLIKELY(checkRegisterData(FAMILY_DWORD, bti, fn, whyNot) ==
> false))
> - return false;
> return true;
> }
>
> @@ -1065,7 +1090,7 @@ namespace ir {
> template <typename T>
> INLINE bool wellFormedLoadStore(const T &insn, const Function &fn,
> std::string &whyNot)
> {
> - if (UNLIKELY(insn.offset >= fn.regNum())) {
> + if (UNLIKELY(insn.getAddressRegister() >= fn.regNum())) {
> whyNot = "Out-of-bound offset register index";
> return false;
> }
> @@ -1073,10 +1098,11 @@ namespace ir {
> whyNot = "Out-of-bound tuple index";
> return false;
> }
> +
> // Check all registers
> - const RegisterFamily family = getFamily(insn.type);
> - for (uint32_t valueID = 0; valueID < insn.valueNum; ++valueID) {
> - const Register regID = fn.getRegister(insn.values, valueID);
> + const RegisterFamily family = getFamily(insn.getValueType());
> + for (uint32_t valueID = 0; valueID < insn.getValueNum(); ++valueID) {
> + const Register regID = insn.getValue(fn, valueID);;
> if (UNLIKELY(checkRegisterData(family, regID, fn, whyNot) == false))
> return false;
> }
> @@ -1260,12 +1286,18 @@ namespace ir {
>
> INLINE void AtomicInstruction::out(std::ostream &out, const Function &fn)
> const {
> this->outOpcode(out);
> - out << "." << addrSpace;
> + out << "." << AS;
> out << " %" << this->getDst(fn, 0);
> - out << " {" << "%" << this->getSrc(fn, 1) << "}";
> - for (uint32_t i = 2; i < srcNum; ++i)
> + out << " {" << "%" << this->getSrc(fn, 0) << "}";
> + for (uint32_t i = 1; i < srcNum; ++i)
> out << " %" << this->getSrc(fn, i);
> - out << (fixedBTI ? " bti" : " bti(mixed)") << " %" << this->getBTI();
> + AddressMode am = this->getAddressMode();
> + out << " bti:";
> + if ( am == AM_DynamicBti) {
> + out << " %" << this->getBtiReg();
> + } else {
> + out << this->getSurfaceIndex();
> + }
> }
>
>
> @@ -1294,23 +1326,35 @@ namespace ir {
>
> INLINE void LoadInstruction::out(std::ostream &out, const Function &fn)
> const {
> this->outOpcode(out);
> - out << "." << type << "." << addrSpace << (dwAligned ? "." : ".un") <<
> "aligned";
> + out << "." << type << "." << AS << (dwAligned ? "." : ".un") << "aligned";
> out << " {";
> for (uint32_t i = 0; i < valueNum; ++i)
> out << "%" << this->getDst(fn, i) << (i != (valueNum-1u) ? " " : "");
> out << "}";
> - out << " %" << this->getSrc(fn, 1);
> - out << (fixedBTI ? " bti" : " bti(mixed)") << " %" << this->getBTI();
> + out << " %" << this->getSrc(fn, 0);
> + AddressMode am = this->getAddressMode();
> + out << " bti:";
> + if ( am == AM_DynamicBti) {
> + out << " %" << this->getBtiReg();
> + } else {
> + out << this->getSurfaceIndex();
> + }
> }
>
> INLINE void StoreInstruction::out(std::ostream &out, const Function &fn)
> const {
> this->outOpcode(out);
> - out << "." << type << "." << addrSpace << (dwAligned ? "." : ".un") <<
> "aligned";
> - out << " %" << this->getSrc(fn, 1) << " {";
> + out << "." << type << "." << AS << (dwAligned ? "." : ".un") << "aligned";
> + out << " %" << this->getSrc(fn, 0) << " {";
> for (uint32_t i = 0; i < valueNum; ++i)
> - out << "%" << this->getSrc(fn, i+2) << (i != (valueNum-1u) ? " " : "");
> + out << "%" << this->getSrc(fn, i+1) << (i != (valueNum-1u) ? " " : "");
> out << "}";
> - out << (fixedBTI ? " bti" : " bti(mixed)") << " %" << this->getBTI();
> + AddressMode am = this->getAddressMode();
> + out << " bti:";
> + if ( am == AM_DynamicBti) {
> + out << " %" << this->getBtiReg();
> + } else {
> + out << this->getSurfaceIndex();
> + }
> }
>
> INLINE void ReadARFInstruction::out(std::ostream &out, const Function
> &fn) const {
> @@ -1664,19 +1708,17 @@ DECL_MEM_FN(BitCastInstruction, Type,
> getSrcType(void), getSrcType())
> DECL_MEM_FN(BitCastInstruction, Type, getDstType(void), getDstType())
> DECL_MEM_FN(ConvertInstruction, Type, getSrcType(void), getSrcType())
> DECL_MEM_FN(ConvertInstruction, Type, getDstType(void), getDstType())
> -DECL_MEM_FN(AtomicInstruction, AddressSpace, getAddressSpace(void),
> getAddressSpace())
> +DECL_MEM_FN(MemInstruction, AddressSpace, getAddressSpace(void),
> getAddressSpace())
> +DECL_MEM_FN(MemInstruction, AddressMode, getAddressMode(void),
> getAddressMode())
> +DECL_MEM_FN(MemInstruction, Register, getAddressRegister(void),
> getAddressRegister())
> +DECL_MEM_FN(MemInstruction, Register, getBtiReg(void), getBtiReg())
> +DECL_MEM_FN(MemInstruction, unsigned, getSurfaceIndex(void),
> getSurfaceIndex())
> +DECL_MEM_FN(MemInstruction, Type, getValueType(void),
> getValueType())
> +DECL_MEM_FN(MemInstruction, bool, isAligned(void), isAligned())
> +DECL_MEM_FN(MemInstruction, unsigned, getAddressIndex(void),
> getAddressIndex())
> DECL_MEM_FN(AtomicInstruction, AtomicOps, getAtomicOpcode(void),
> getAtomicOpcode())
> -DECL_MEM_FN(AtomicInstruction, bool, isFixedBTI(void), isFixedBTI())
> -DECL_MEM_FN(StoreInstruction, Type, getValueType(void),
> getValueType())
> DECL_MEM_FN(StoreInstruction, uint32_t, getValueNum(void),
> getValueNum())
> -DECL_MEM_FN(StoreInstruction, AddressSpace, getAddressSpace(void),
> getAddressSpace())
> -DECL_MEM_FN(StoreInstruction, bool, isAligned(void), isAligned())
> -DECL_MEM_FN(StoreInstruction, bool, isFixedBTI(void), isFixedBTI())
> -DECL_MEM_FN(LoadInstruction, Type, getValueType(void), getValueType())
> DECL_MEM_FN(LoadInstruction, uint32_t, getValueNum(void),
> getValueNum())
> -DECL_MEM_FN(LoadInstruction, AddressSpace, getAddressSpace(void),
> getAddressSpace())
> -DECL_MEM_FN(LoadInstruction, bool, isAligned(void), isAligned())
> -DECL_MEM_FN(LoadInstruction, bool, isFixedBTI(void), isFixedBTI())
> DECL_MEM_FN(LoadImmInstruction, Type, getType(void), getType())
> DECL_MEM_FN(LabelInstruction, LabelIndex, getLabelIndex(void),
> getLabelIndex())
> DECL_MEM_FN(BranchInstruction, bool, isPredicated(void), isPredicated())
> @@ -1702,6 +1744,15 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t,
> getImageIndex(void), getImageIndex
>
> #undef DECL_MEM_FN
>
> +#define DECL_MEM_FN(CLASS, RET, PROTOTYPE, CALL) \
> + RET CLASS::PROTOTYPE { \
> + return reinterpret_cast<internal::CLASS*>(this)->CALL; \
> + }
> +DECL_MEM_FN(MemInstruction, void, setSurfaceIndex(unsigned id),
> setSurfaceIndex(id))
> +DECL_MEM_FN(MemInstruction, void, setBtiReg(Register reg),
> setBtiReg(reg))
> +
> +#undef DECL_MEM_FN
> +
> Immediate LoadImmInstruction::getImmediate(void) const {
> const Function &fn = this->getFunction();
> return reinterpret_cast<const internal::LoadImmInstruction*>(this)-
> >getImmediate(fn);
> @@ -1843,8 +1894,16 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t,
> getImageIndex(void), getImageIndex
> }
>
> // For all unary functions with given opcode
> - Instruction ATOMIC(AtomicOps atomicOp, Register dst, AddressSpace
> space, Register bti, bool fixedBTI, Tuple src) {
> - return internal::AtomicInstruction(atomicOp, dst, space, bti, fixedBTI,
> src).convert();
> + Instruction ATOMIC(AtomicOps atomicOp, Type type, Register dst,
> AddressSpace space, Register address, Tuple payload, AddressMode AM,
> Register bti) {
> + internal::AtomicInstruction insn = internal::AtomicInstruction(atomicOp,
> type, dst, space, address, payload, AM);
> + insn.setBtiReg(bti);
> + return insn.convert();
> + }
> +
> + Instruction ATOMIC(AtomicOps atomicOp, Type type, Register dst,
> AddressSpace space, Register address, Tuple payload, AddressMode AM,
> unsigned SurfaceIndex) {
> + internal::AtomicInstruction insn = internal::AtomicInstruction(atomicOp,
> type, dst, space, address, payload, AM);
> + insn.setSurfaceIndex(SurfaceIndex);
> + return insn.convert();
> }
>
> // BRA
> @@ -1892,10 +1951,25 @@ DECL_MEM_FN(GetImageInfoInstruction,
> uint8_t, getImageIndex(void), getImageIndex
> AddressSpace space, \
> uint32_t valueNum, \
> bool dwAligned, \
> - bool fixedBTI, \
> + AddressMode AM, \
> + unsigned SurfaceIndex) \
> + { \
> + internal::CLASS insn =
> internal::CLASS(type,tuple,offset,space,valueNum,dwAligned,AM); \
> + insn.setSurfaceIndex(SurfaceIndex);\
> + return insn.convert(); \
> + } \
> + Instruction NAME(Type type, \
> + Tuple tuple, \
> + Register offset, \
> + AddressSpace space, \
> + uint32_t valueNum, \
> + bool dwAligned, \
> + AddressMode AM, \
> Register bti) \
> { \
> - return
> internal::CLASS(type,tuple,offset,space,valueNum,dwAligned,fixedBTI,bti).c
> onvert(); \
> + internal::CLASS insn =
> internal::CLASS(type,tuple,offset,space,valueNum,dwAligned,AM); \
> + insn.setBtiReg(bti); \
> + return insn.convert(); \
> }
>
> DECL_EMIT_FUNCTION(LOAD, LoadInstruction)
> diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
> index 3f3c655..b8f95ba 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -65,6 +65,13 @@ namespace ir {
> MEM_INVALID
> };
>
> + enum AddressMode : uint8_t {
> + AM_DynamicBti = 0,
> + AM_Stateless,
> + AM_StaticBti,
> + AM_INVALID
> + };
> +
> enum AtomicOps {
> ATOMIC_OP_AND = 1,
> ATOMIC_OP_OR = 2,
> @@ -288,20 +295,30 @@ namespace ir {
> static bool isClassOf(const Instruction &insn);
> };
>
> + class MemInstruction : public Instruction {
> + public:
> + unsigned getSurfaceIndex() const;
> + unsigned getAddressIndex() const;
> + /*! Address space that is manipulated here */
> + AddressMode getAddressMode() const;
> + Register getBtiReg() const;
> + /*! Return the register that contains the addresses */
> + Register getAddressRegister() const;
> + AddressSpace getAddressSpace() const;
> + /*! Return the types of the values */
> + Type getValueType() const;
> + bool isAligned(void) const;
> + void setBtiReg(Register reg);
> + void setSurfaceIndex(unsigned idx);
> + };
> +
> /*! Atomic instruction */
> - class AtomicInstruction : public Instruction {
> + class AtomicInstruction : public MemInstruction {
> public:
> /*! Where the address register goes */
> - static const uint32_t btiIndex = 0;
> - static const uint32_t addressIndex = 1;
> - /*! Address space that is manipulated here */
> - AddressSpace getAddressSpace(void) const;
> - Register getBTI(void) const { return this->getSrc(btiIndex); }
> - bool isFixedBTI(void) const;
> + static const uint32_t addressIndex = 0;
> /*! Return the atomic function code */
> AtomicOps getAtomicOpcode(void) const;
> - /*! Return the register that contains the addresses */
> - INLINE Register getAddress(void) const { return this-
> >getSrc(addressIndex); }
> /*! Return true if the given instruction is an instance of this class */
> static bool isClassOf(const Instruction &insn);
> };
> @@ -309,27 +326,15 @@ namespace ir {
> /*! Store instruction. First source is the address. Next sources are the
> * values to store contiguously at the given address
> */
> - class StoreInstruction : public Instruction {
> + class StoreInstruction : public MemInstruction {
> public:
> /*! Where the address register goes */
> - static const uint32_t btiIndex = 0;
> - static const uint32_t addressIndex = 1;
> - /*! Return the types of the values to store */
> - Type getValueType(void) const;
> - /*! Give the number of values the instruction is storing (srcNum-1) */
> + static const uint32_t addressIndex = 0;
> uint32_t getValueNum(void) const;
> - Register getBTI(void) const { return this->getSrc(btiIndex); }
> - bool isFixedBTI(void) const;
> - /*! Address space that is manipulated here */
> - AddressSpace getAddressSpace(void) const;
> - /*! DWORD aligned means untyped read for Gen. That is what matters */
> - bool isAligned(void) const;
> - /*! Return the register that contains the addresses */
> - INLINE Register getAddress(void) const { return this-
> >getSrc(addressIndex); }
> /*! Return the register that contain value valueID */
> INLINE Register getValue(uint32_t valueID) const {
> GBE_ASSERT(valueID < this->getValueNum());
> - return this->getSrc(valueID + 2u);
> + return this->getSrc(valueID + 1u);
> }
> /*! Return true if the given instruction is an instance of this class */
> static bool isClassOf(const Instruction &insn);
> @@ -339,20 +344,10 @@ namespace ir {
> * The multiple destinations are the contiguous values loaded at the given
> * address
> */
> - class LoadInstruction : public Instruction {
> + class LoadInstruction : public MemInstruction {
> public:
> - /*! Type of the loaded values (ie type of all the destinations) */
> - Type getValueType(void) const;
> /*! Number of values loaded (ie number of destinations) */
> uint32_t getValueNum(void) const;
> - /*! Address space that is manipulated here */
> - AddressSpace getAddressSpace(void) const;
> - /*! DWORD aligned means untyped read for Gen. That is what matters */
> - bool isAligned(void) const;
> - /*! Return the register that contains the addresses */
> - INLINE Register getAddress(void) const { return this->getSrc(1u); }
> - Register getBTI(void) const {return this->getSrc(0u);}
> - bool isFixedBTI(void) const;
> /*! Return the register that contain value valueID */
> INLINE Register getValue(uint32_t valueID) const {
> return this->getDst(valueID);
> @@ -725,7 +720,8 @@ namespace ir {
> /*! F32TO16.{dstType <- srcType} dst src */
> Instruction F32TO16(Type dstType, Type srcType, Register dst, Register src);
> /*! atomic dst addr.space {src1 {src2}} */
> - Instruction ATOMIC(AtomicOps opcode, Register dst, AddressSpace space,
> Register bti, bool fixedBTI, Tuple src);
> + Instruction ATOMIC(AtomicOps opcode, Type, Register dst, AddressSpace
> space, Register ptr, Tuple payload, AddressMode, unsigned);
> + Instruction ATOMIC(AtomicOps opcode, Type, Register dst, AddressSpace
> space, Register ptr, Tuple src, AddressMode, Register);
> /*! bra labelIndex */
> Instruction BRA(LabelIndex labelIndex);
> /*! (pred) bra labelIndex */
> @@ -740,10 +736,12 @@ namespace ir {
> Instruction WHILE(LabelIndex labelIndex, Register pred);
> /*! ret */
> Instruction RET(void);
> - /*! load.type.space {dst1,...,dst_valueNum} offset value */
> - Instruction LOAD(Type type, Tuple dst, Register offset, AddressSpace
> space, uint32_t valueNum, bool dwAligned, bool fixedBTI, Register bti);
> - /*! store.type.space offset {src1,...,src_valueNum} value */
> - Instruction STORE(Type type, Tuple src, Register offset, AddressSpace
> space, uint32_t valueNum, bool dwAligned, bool fixedBTI, Register bti);
> + /*! load.type.space {dst1,...,dst_valueNum} offset value, {bti} */
> + Instruction LOAD(Type type, Tuple dst, Register offset, AddressSpace
> space, uint32_t valueNum, bool dwAligned, AddressMode, unsigned
> SurfaceIndex);
> + Instruction LOAD(Type type, Tuple dst, Register offset, AddressSpace
> space, uint32_t valueNum, bool dwAligned, AddressMode, Register bti);
> + /*! store.type.space offset {src1,...,src_valueNum} value {bti}*/
> + Instruction STORE(Type type, Tuple src, Register offset, AddressSpace
> space, uint32_t valueNum, bool dwAligned, AddressMode, unsigned
> SurfaceIndex);
> + Instruction STORE(Type type, Tuple src, Register offset, AddressSpace
> space, uint32_t valueNum, bool dwAligned, AddressMode, Register bti);
> /*! loadi.type dst value */
> Instruction LOADI(Type type, Register dst, ImmediateIndex value);
> /*! sync.params... (see Sync instruction) */
> diff --git a/backend/src/ir/lowering.cpp b/backend/src/ir/lowering.cpp
> index 9fcdf74..66ced8c 100644
> --- a/backend/src/ir/lowering.cpp
> +++ b/backend/src/ir/lowering.cpp
> @@ -320,7 +320,7 @@ namespace ir {
> continue;
>
> IndirectLoad indirectLoad;
> - Register addr = load->getAddress();
> + Register addr = load->getAddressRegister();
> indirectLoad.argID = argID;
> indirectLoad.load = insn;
>
> @@ -368,7 +368,7 @@ namespace ir {
>
> const Register reg = load->getValue(valueID);
>
> - Instruction mov = ir::INDIRECT_MOV(type, reg, arg, load->getAddress(),
> offset);
> + Instruction mov = ir::INDIRECT_MOV(type, reg, arg, load-
> >getAddressRegister(), offset);
> mov.insert(ins_after, &ins_after);
> replaced = true;
> }
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp
> b/backend/src/llvm/llvm_gen_backend.cpp
> index 7299d53..39665b8 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -386,6 +386,36 @@ namespace gbe
> ir::Context &ctx;
> };
>
> + class GenWriter;
> + class MemoryInstHelper {
> + public:
> + MemoryInstHelper(ir::Context &c, ir::Unit &u, GenWriter *w, bool l)
> + : ctx(c),
> + unit(u),
> + writer(w),
> + legacyMode(l)
> + { }
> + void emitUnalignedDQLoadStore(Value *llvmValues);
> + ir::Tuple getValueTuple(llvm::Value *llvmValues, llvm::Type *elemType,
> unsigned start, unsigned elemNum);
> + void emitBatchLoadOrStore(const ir::Type type, const uint32_t
> elemNum, Value *llvmValues, Type * elemType);
> + ir::Register getOffsetAddress(ir::Register basePtr, unsigned offset);
> + void shootMessage(ir::Type type, ir::Register offset, ir::Tuple value,
> unsigned elemNum);
> + template <bool isLoad, typename T>
> + void emitLoadOrStore(T &I);
> + private:
> + ir::Context &ctx;
> + ir::Unit &unit;
> + GenWriter *writer;
> + bool legacyMode;
> + ir::AddressSpace addrSpace;
> + ir::Register mBTI;
> + ir::Register mPtr;
> + ir::AddressMode mAddressMode;
> + unsigned SurfaceIndex;
> + bool isLoad;
> + bool dwAligned;
> + };
> +
> /*! Translate LLVM IR code to Gen IR code */
> class GenWriter : public FunctionPass, public InstVisitor<GenWriter>
> {
> @@ -437,6 +467,9 @@ namespace gbe
> Function *Func;
> const Module *TheModule;
> int btiBase;
> + /*! legacyMode is for hardware before BDW,
> + * which do not support stateless memory access */
> + bool legacyMode;
> public:
> static char ID;
> explicit GenWriter(ir::Unit &unit)
> @@ -446,7 +479,8 @@ namespace gbe
> regTranslator(ctx),
> LI(0),
> TheModule(0),
> - btiBase(BTI_RESERVED_NUM)
> + btiBase(BTI_RESERVED_NUM),
> + legacyMode(true)
> {
> #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=7
> initializeLoopInfoWrapperPassPass(*PassRegistry::getPassRegistry());
> @@ -491,7 +525,8 @@ namespace gbe
>
> Func = &F;
> assignBti(F);
> - analyzePointerOrigin(F);
> + if (legacyMode)
> + analyzePointerOrigin(F);
>
> #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=7
> LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
> @@ -643,6 +678,7 @@ namespace gbe
> ir::ImmediateIndex processSeqConstant(ConstantDataSequential *seq,
> int index, ConstTypeId tid);
> ir::ImmediateIndex processConstantVector(ConstantVector *cv, int
> index);
> + friend class MemoryInstHelper;
> };
>
> char GenWriter::ID = 0;
> @@ -3570,47 +3606,55 @@ namespace gbe
> CallSite::arg_iterator AI = CS.arg_begin();
> CallSite::arg_iterator AE = CS.arg_end();
> GBE_ASSERT(AI != AE);
> -
> - ir::AddressSpace addrSpace;
> -
> Value *llvmPtr = *AI;
> - Value *bti = getBtiRegister(llvmPtr);
> - Value *ptrBase = getPointerBase(llvmPtr);
> + ir::AddressSpace addrSpace = addressSpaceLLVMToGen(llvmPtr-
> >getType()->getPointerAddressSpace());
> ir::Register pointer = this->getRegister(llvmPtr);
> - ir::Register baseReg = this->getRegister(ptrBase);
>
> + ir::Register ptr;
> ir::Register btiReg;
> - bool fixedBTI = false;
> - if (isa<ConstantInt>(bti)) {
> - fixedBTI = true;
> - unsigned index = cast<ConstantInt>(bti)->getZExtValue();
> - addrSpace = btiToGen(index);
> - ir::ImmediateIndex immIndex = ctx.newImmediate((uint32_t)index);
> - btiReg = ctx.reg(ir::FAMILY_DWORD);
> - ctx.LOADI(ir::TYPE_U32, btiReg, immIndex);
> + unsigned SurfaceIndex = 0xff;;
> +
> + ir::AddressMode AM;
> + if (legacyMode) {
> + Value *bti = getBtiRegister(llvmPtr);
> + Value *ptrBase = getPointerBase(llvmPtr);
> + ir::Register baseReg = this->getRegister(ptrBase);
> + if (isa<ConstantInt>(bti)) {
> + AM = ir::AM_StaticBti;
> + SurfaceIndex = cast<ConstantInt>(bti)->getZExtValue();
> + addrSpace = btiToGen(SurfaceIndex);
> + } else {
> + AM = ir::AM_DynamicBti;
> + addrSpace = ir::MEM_MIXED;
> + btiReg = this->getRegister(bti);
> + }
> + const ir::RegisterFamily pointerFamily = ctx.getPointerFamily();
> + ptr = ctx.reg(pointerFamily);
> + ctx.SUB(ir::TYPE_U32, ptr, pointer, baseReg);
> } else {
> - addrSpace = ir::MEM_MIXED;
> - btiReg = this->getRegister(bti);
> + AM = ir::AM_Stateless;
> + ptr = pointer;
> }
>
> - const ir::RegisterFamily pointerFamily = ctx.getPointerFamily();
> - const ir::Register ptr = ctx.reg(pointerFamily);
> - ctx.SUB(ir::TYPE_U32, ptr, pointer, baseReg);
> -
> const ir::Register dst = this->getRegister(&I);
>
> - uint32_t srcNum = 0;
> - vector<ir::Register> src;
> - src.push_back(ptr);
> - srcNum++;
> + uint32_t payloadNum = 0;
> + vector<ir::Register> payload;
> AI++;
>
> while(AI != AE) {
> - src.push_back(this->getRegister(*(AI++)));
> - srcNum++;
> + payload.push_back(this->getRegister(*(AI++)));
> + payloadNum++;
> + }
> + ir::Type type = getType(ctx, llvmPtr->getType()-
> >getPointerElementType());
> + const ir::Tuple payloadTuple = payloadNum == 0 ?
> + ir::Tuple(0) :
> + ctx.arrayTuple(&payload[0], payloadNum);
> + if (AM == ir::AM_DynamicBti) {
> + ctx.ATOMIC(opcode, type, dst, addrSpace, ptr, payloadTuple, AM,
> btiReg);
> + } else {
> + ctx.ATOMIC(opcode, type, dst, addrSpace, ptr, payloadTuple, AM,
> SurfaceIndex);
> }
> - const ir::Tuple srcTuple = ctx.arrayTuple(&src[0], srcNum);
> - ctx.ATOMIC(opcode, dst, addrSpace, btiReg, fixedBTI, srcTuple);
> }
>
> /* append a new sampler. should be called before any reference to
> @@ -4323,65 +4367,82 @@ namespace gbe
> this->newRegister(&I);
> }
> void GenWriter::regAllocateStoreInst(StoreInst &I) {}
> + void GenWriter::emitLoadInst(LoadInst &I) {
> + MemoryInstHelper *h = new MemoryInstHelper(ctx, unit, this,
> legacyMode);
> + h->emitLoadOrStore<true>(I);
> + delete h;
> + }
>
> - void GenWriter::emitBatchLoadOrStore(const ir::Type type, const uint32_t
> elemNum,
> - Value *llvmValues, const ir::Register ptr,
> - const ir::AddressSpace addrSpace,
> - Type * elemType, bool isLoad, ir::Register bti,
> - bool dwAligned, bool fixedBTI) {
> - const ir::RegisterFamily pointerFamily = ctx.getPointerFamily();
> - uint32_t totalSize = elemNum * getFamilySize(getFamily(type));
> - uint32_t msgNum = totalSize > 16 ? totalSize / 16 : 1;
> - const uint32_t perMsgNum = elemNum / msgNum;
> + void GenWriter::emitStoreInst(StoreInst &I) {
> + MemoryInstHelper *h = new MemoryInstHelper(ctx, unit, this,
> legacyMode);
> + h->emitLoadOrStore<false>(I);
> + delete h;
> + }
>
> - for (uint32_t msg = 0; msg < msgNum; ++msg) {
> - // Build the tuple data in the vector
> + llvm::FunctionPass *createGenPass(ir::Unit &unit) {
> + return new GenWriter(unit);
> + }
> +
> + ir::Tuple MemoryInstHelper::getValueTuple(llvm::Value *llvmValues,
> llvm::Type *elemType, unsigned start, unsigned elemNum) {
> vector<ir::Register> tupleData; // put registers here
> - for (uint32_t elemID = 0; elemID < perMsgNum; ++elemID) {
> + for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
> ir::Register reg;
> - if(regTranslator.isUndefConst(llvmValues, elemID)) {
> + if(writer->regTranslator.isUndefConst(llvmValues, elemID)) {
> Value *v = Constant::getNullValue(elemType);
> - reg = this->getRegister(v);
> + reg = writer->getRegister(v);
> } else
> - reg = this->getRegister(llvmValues, perMsgNum*msg+elemID);
> + reg = writer->getRegister(llvmValues, start + elemID);
>
> tupleData.push_back(reg);
> }
> - const ir::Tuple tuple = ctx.arrayTuple(&tupleData[0], perMsgNum);
> -
> - // We may need to update to offset the pointer
> - ir::Register addr;
> - if (msg == 0)
> - addr = ptr;
> - else {
> - const ir::Register offset = ctx.reg(pointerFamily);
> - ir::ImmediateIndex immIndex;
> - ir::Type immType;
> + const ir::Tuple tuple = ctx.arrayTuple(&tupleData[0], elemNum);
> + return tuple;
> + }
> +
> + void MemoryInstHelper::emitBatchLoadOrStore(const ir::Type type, const
> uint32_t elemNum,
> + Value *llvmValues,
> + Type * elemType) {
> + uint32_t totalSize = elemNum * getFamilySize(getFamily(type));
> + uint32_t msgNum = totalSize > 16 ? totalSize / 16 : 1;
> + const uint32_t perMsgNum = elemNum / msgNum;
> +
> + for (uint32_t msg = 0; msg < msgNum; ++msg) {
> + // Build the tuple data in the vector
> + ir::Tuple tuple = getValueTuple(llvmValues, elemType, perMsgNum*msg,
> perMsgNum);
> // each message can read/write 16 byte
> const int32_t stride = 16;
> - if (pointerFamily == ir::FAMILY_DWORD) {
> - immIndex = ctx.newImmediate(int32_t(msg*stride));
> - immType = ir::TYPE_S32;
> - } else {
> - immIndex = ctx.newImmediate(int64_t(msg*stride));
> - immType = ir::TYPE_S64;
> - }
> + ir::Register addr = getOffsetAddress(mPtr, msg*stride);
> + shootMessage(type, addr, tuple, perMsgNum);
> + }
> + }
> +
> + ir::Register MemoryInstHelper::getOffsetAddress(ir::Register basePtr,
> unsigned offset) {
> + const ir::RegisterFamily pointerFamily = ctx.getPointerFamily();
> + ir::Register addr;
> + if (offset == 0)
> + addr = basePtr;
> + else {
> + const ir::Register offsetReg = ctx.reg(pointerFamily);
> + ir::ImmediateIndex immIndex;
> + ir::Type immType;
>
> - addr = ctx.reg(pointerFamily);
> - ctx.LOADI(immType, offset, immIndex);
> - ctx.ADD(immType, addr, ptr, offset);
> + if (pointerFamily == ir::FAMILY_DWORD) {
> + immIndex = ctx.newImmediate(int32_t(offset));
> + immType = ir::TYPE_S32;
> + } else {
> + immIndex = ctx.newImmediate(int64_t(offset));
> + immType = ir::TYPE_S64;
> }
>
> - // Emit the instruction
> - if (isLoad)
> - ctx.LOAD(type, tuple, addr, addrSpace, perMsgNum, dwAligned,
> fixedBTI, bti);
> - else
> - ctx.STORE(type, tuple, addr, addrSpace, perMsgNum, dwAligned,
> fixedBTI, bti);
> + addr = ctx.reg(pointerFamily);
> + ctx.LOADI(immType, offsetReg, immIndex);
> + ctx.ADD(immType, addr, basePtr, offsetReg);
> }
> + return addr;
> }
>
> // handle load of dword/qword with unaligned address
> - void GenWriter::emitUnalignedDQLoadStore(ir::Register ptr, Value
> *llvmValues, ir::AddressSpace addrSpace, ir::Register bti, bool isLoad, bool
> dwAligned, bool fixedBTI)
> + void MemoryInstHelper::emitUnalignedDQLoadStore(Value *llvmValues)
> {
> Type *llvmType = llvmValues->getType();
> unsigned byteSize = getTypeByteSize(unit, llvmType);
> @@ -4395,19 +4456,7 @@ namespace gbe
> }
> const ir::Type type = getType(ctx, elemType);
>
> - vector<ir::Register> tupleData;
> - for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
> - ir::Register reg;
> - if(regTranslator.isUndefConst(llvmValues, elemID)) {
> - Value *v = Constant::getNullValue(elemType);
> - reg = this->getRegister(v);
> - } else
> - reg = this->getRegister(llvmValues, elemID);
> -
> - tupleData.push_back(reg);
> - }
> - const ir::Tuple tuple = ctx.arrayTuple(&tupleData[0], elemNum);
> -
> + ir::Tuple tuple = getValueTuple(llvmValues, elemType, 0, elemNum);
> vector<ir::Register> byteTupleData;
> for (uint32_t elemID = 0; elemID < byteSize; ++elemID) {
> byteTupleData.push_back(ctx.reg(ir::FAMILY_BYTE));
> @@ -4415,97 +4464,83 @@ namespace gbe
> const ir::Tuple byteTuple = ctx.arrayTuple(&byteTupleData[0], byteSize);
>
> if (isLoad) {
> - ctx.LOAD(ir::TYPE_U8, byteTuple, ptr, addrSpace, byteSize, dwAligned,
> fixedBTI, bti);
> + shootMessage(ir::TYPE_U8, mPtr, byteTuple, byteSize);
> ctx.BITCAST(type, ir::TYPE_U8, tuple, byteTuple, elemNum, byteSize);
> } else {
> ctx.BITCAST(ir::TYPE_U8, type, byteTuple, tuple, byteSize, elemNum);
> // FIXME: byte scatter does not handle correctly vector store, after fix
> that,
> // we can directly use on store instruction like:
> // ctx.STORE(ir::TYPE_U8, byteTuple, ptr, addrSpace, byteSize,
> dwAligned, fixedBTI, bti);
> - const ir::RegisterFamily pointerFamily = ctx.getPointerFamily();
> for (uint32_t elemID = 0; elemID < byteSize; elemID++) {
> - const ir::Register reg = byteTupleData[elemID];
> - ir::Register addr;
> - if (elemID == 0)
> - addr = ptr;
> - else {
> - const ir::Register offset = ctx.reg(pointerFamily);
> - ir::ImmediateIndex immIndex;
> - immIndex = ctx.newImmediate(int32_t(elemID));
> - addr = ctx.reg(pointerFamily);
> - ctx.LOADI(ir::TYPE_S32, offset, immIndex);
> - ctx.ADD(ir::TYPE_S32, addr, ptr, offset);
> - }
> - ctx.STORE(ir::TYPE_U8, addr, addrSpace, dwAligned, fixedBTI, bti, reg);
> + const ir::Register addr = getOffsetAddress(mPtr, elemID);
> + const ir::Tuple value = ctx.arrayTuple(&byteTupleData[elemID], 1);
> + shootMessage(ir::TYPE_U8, addr, value, 1);
> }
> }
> }
>
> - extern int OCL_SIMD_WIDTH;
> template <bool isLoad, typename T>
> - INLINE void GenWriter::emitLoadOrStore(T &I)
> - {
> + void MemoryInstHelper::emitLoadOrStore(T &I) {
> Value *llvmPtr = I.getPointerOperand();
> Value *llvmValues = getLoadOrStoreValue(I);
> Type *llvmType = llvmValues->getType();
> - const bool dwAligned = (I.getAlignment() % 4) == 0;
> - ir::AddressSpace addrSpace;
> - const ir::Register pointer = this->getRegister(llvmPtr);
> + dwAligned = (I.getAlignment() % 4) == 0;
> + addrSpace = addressSpaceLLVMToGen(llvmPtr->getType()-
> >getPointerAddressSpace());
> + const ir::Register pointer = writer->getRegister(llvmPtr);
> const ir::RegisterFamily pointerFamily = ctx.getPointerFamily();
>
> - Value *bti = getBtiRegister(llvmPtr);
> - Value *ptrBase = getPointerBase(llvmPtr);
> - ir::Register baseReg = this->getRegister(ptrBase);
> - bool zeroBase = false;
> - if (isa<ConstantPointerNull>(ptrBase)) {
> - zeroBase = true;
> - }
> -
> - ir::Register btiReg;
> - bool fixedBTI = false;
> - if (isa<ConstantInt>(bti)) {
> - fixedBTI = true;
> - unsigned index = cast<ConstantInt>(bti)->getZExtValue();
> - addrSpace = btiToGen(index);
> - ir::ImmediateIndex immIndex = ctx.newImmediate((uint32_t)index);
> - btiReg = ctx.reg(ir::FAMILY_DWORD);
> - ctx.LOADI(ir::TYPE_U32, btiReg, immIndex);
> - } else {
> - addrSpace = ir::MEM_MIXED;
> - btiReg = this->getRegister(bti);
> - }
> -
> + this->isLoad = isLoad;
> Type *scalarType = llvmType;
> if (!isScalarType(llvmType)) {
> VectorType *vectorType = cast<VectorType>(llvmType);
> scalarType = vectorType->getElementType();
> }
>
> - ir::Register ptr = ctx.reg(pointerFamily);
> - // FIXME: avoid subtraction zero at this stage is not a good idea,
> - // but later ArgumentLower pass need to match exact load/addImm
> pattern
> - // so, I avoid subtracting zero base to satisfy ArgumentLower pass.
> - if (!zeroBase)
> - ctx.SUB(ir::TYPE_U32, ptr, pointer, baseReg);
> - else
> - ptr = pointer;
> + // calculate bti and pointer operand
> + if (legacyMode) {
> + Value *bti = writer->getBtiRegister(llvmPtr);
> + Value *ptrBase = writer->getPointerBase(llvmPtr);
> + ir::Register baseReg = writer->getRegister(ptrBase);
> + bool zeroBase = isa<ConstantPointerNull>(ptrBase) ? true : false;
> +
> + if (isa<ConstantInt>(bti)) {
> + SurfaceIndex = cast<ConstantInt>(bti)->getZExtValue();
> + addrSpace = btiToGen(SurfaceIndex);
> + mAddressMode = ir::AM_StaticBti;
> + } else {
> + addrSpace = ir::MEM_MIXED;
> + mBTI = writer->getRegister(bti);
> + mAddressMode = ir::AM_DynamicBti;
> + }
> + mPtr = ctx.reg(pointerFamily);
> +
> + // FIXME: avoid subtraction zero at this stage is not a good idea,
> + // but later ArgumentLower pass need to match exact load/addImm
> pattern
> + // so, I avoid subtracting zero base to satisfy ArgumentLower pass.
> + if (!zeroBase)
> + ctx.SUB(ir::TYPE_U32, mPtr, pointer, baseReg);
> + else
> + mPtr = pointer;
> + } else {
> + mPtr = pointer;
> + SurfaceIndex = 0xff;
> + mAddressMode = ir::AM_Stateless;
> + }
>
> unsigned primitiveBits = scalarType->getPrimitiveSizeInBits();
> if (!dwAligned
> && (primitiveBits == 64
> || primitiveBits == 32)
> ) {
> - emitUnalignedDQLoadStore(ptr, llvmValues, addrSpace, btiReg, isLoad,
> dwAligned, fixedBTI);
> + emitUnalignedDQLoadStore(llvmValues);
> return;
> }
> // Scalar is easy. We neednot build register tuples
> if (isScalarType(llvmType) == true) {
> const ir::Type type = getType(ctx, llvmType);
> - const ir::Register values = this->getRegister(llvmValues);
> - if (isLoad)
> - ctx.LOAD(type, ptr, addrSpace, dwAligned, fixedBTI, btiReg, values);
> - else
> - ctx.STORE(type, ptr, addrSpace, dwAligned, fixedBTI, btiReg, values);
> + const ir::Register values = writer->getRegister(llvmValues);
> + const ir::Tuple tuple = ctx.arrayTuple(&values, 1);
> + shootMessage(type, mPtr, tuple, 1);
> }
> // A vector type requires to build a tuple
> else {
> @@ -4521,7 +4556,7 @@ namespace gbe
> // And the llvm does cast a type3 data to type4 for load/store instruction,
> // so a 4 elements vector may only have 3 valid elements. We need to fix
> it to correct element
> // count here.
> - if (elemNum == 4 && regTranslator.isUndefConst(llvmValues, 3))
> + if (elemNum == 4 && writer->regTranslator.isUndefConst(llvmValues, 3))
> elemNum = 3;
>
> // The code is going to be fairly different from types to types (based on
> @@ -4532,72 +4567,44 @@ namespace gbe
> if(dataFamily == ir::FAMILY_DWORD && addrSpace !=
> ir::MEM_CONSTANT) {
> // One message is enough here. Nothing special to do
> if (elemNum <= 4) {
> - // Build the tuple data in the vector
> - vector<ir::Register> tupleData; // put registers here
> - for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
> - ir::Register reg;
> - if(regTranslator.isUndefConst(llvmValues, elemID)) {
> - Value *v = Constant::getNullValue(elemType);
> - reg = this->getRegister(v);
> - } else
> - reg = this->getRegister(llvmValues, elemID);
> -
> - tupleData.push_back(reg);
> - }
> - const ir::Tuple tuple = ctx.arrayTuple(&tupleData[0], elemNum);
> -
> - // Emit the instruction
> - if (isLoad)
> - ctx.LOAD(type, tuple, ptr, addrSpace, elemNum, dwAligned, fixedBTI,
> btiReg);
> - else
> - ctx.STORE(type, tuple, ptr, addrSpace, elemNum, dwAligned, fixedBTI,
> btiReg);
> + ir::Tuple tuple = getValueTuple(llvmValues, elemType, 0, elemNum);
> + shootMessage(type, mPtr, tuple, elemNum);
> }
> - // Not supported by the hardware. So, we split the message and we use
> - // strided loads and stores
> else {
> - emitBatchLoadOrStore(type, elemNum, llvmValues, ptr, addrSpace,
> elemType, isLoad, btiReg, dwAligned, fixedBTI);
> + emitBatchLoadOrStore(type, elemNum, llvmValues, elemType);
> }
> }
> else if((dataFamily == ir::FAMILY_WORD && (isLoad || elemNum % 2 ==
> 0)) ||
> (dataFamily == ir::FAMILY_BYTE && (isLoad || elemNum % 4 == 0))) {
> - emitBatchLoadOrStore(type, elemNum, llvmValues, ptr, addrSpace,
> elemType, isLoad, btiReg, dwAligned, fixedBTI);
> + emitBatchLoadOrStore(type, elemNum, llvmValues, elemType);
> } else {
> for (uint32_t elemID = 0; elemID < elemNum; elemID++) {
> - if(regTranslator.isUndefConst(llvmValues, elemID))
> + if(writer->regTranslator.isUndefConst(llvmValues, elemID))
> continue;
>
> - const ir::Register reg = this->getRegister(llvmValues, elemID);
> - ir::Register addr;
> - if (elemID == 0)
> - addr = ptr;
> - else {
> - const ir::Register offset = ctx.reg(pointerFamily);
> - ir::ImmediateIndex immIndex;
> - int elemSize = getTypeByteSize(unit, elemType);
> - immIndex = ctx.newImmediate(int32_t(elemID * elemSize));
> - addr = ctx.reg(pointerFamily);
> - ctx.LOADI(ir::TYPE_S32, offset, immIndex);
> - ctx.ADD(ir::TYPE_S32, addr, ptr, offset);
> - }
> - if (isLoad)
> - ctx.LOAD(type, addr, addrSpace, dwAligned, fixedBTI, btiReg, reg);
> - else
> - ctx.STORE(type, addr, addrSpace, dwAligned, fixedBTI, btiReg, reg);
> + const ir::Register reg = writer->getRegister(llvmValues, elemID);
> + int elemSize = getTypeByteSize(unit, elemType);
> +
> + ir::Register addr = getOffsetAddress(mPtr, elemID*elemSize);
> + const ir::Tuple tuple = ctx.arrayTuple(®, 1);
> + shootMessage(type, addr, tuple, 1);
> }
> }
> }
> }
>
> - void GenWriter::emitLoadInst(LoadInst &I) {
> - this->emitLoadOrStore<true>(I);
> - }
> -
> - void GenWriter::emitStoreInst(StoreInst &I) {
> - this->emitLoadOrStore<false>(I);
> - }
> -
> - llvm::FunctionPass *createGenPass(ir::Unit &unit) {
> - return new GenWriter(unit);
> + void MemoryInstHelper::shootMessage(ir::Type type, ir::Register offset,
> ir::Tuple value, unsigned elemNum) {
> + if (mAddressMode == ir::AM_DynamicBti) {
> + if (isLoad)
> + ctx.LOAD(type, value, offset, addrSpace, elemNum, dwAligned,
> mAddressMode, mBTI);
> + else
> + ctx.STORE(type, value, offset, addrSpace, elemNum, dwAligned,
> mAddressMode, mBTI);
> + } else {
> + if (isLoad)
> + ctx.LOAD(type, value, offset, addrSpace, elemNum, dwAligned,
> mAddressMode, SurfaceIndex);
> + else
> + ctx.STORE(type, value, offset, addrSpace, elemNum, dwAligned,
> mAddressMode, SurfaceIndex);
> + }
> }
> } /* namespace gbe */
>
> --
> 2.3.1
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list