[Beignet] [PATCH 3/3] GBE: Optimize byte/short load/store using untyped read/write
Ruiling Song
ruiling.song at intel.com
Thu Mar 6 00:14:21 PST 2014
Scatter/gather are much worse than untyped read/write. So if we can pack
load/store of char/short to use untyped message, jut do it.
Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
backend/src/backend/gen_context.cpp | 29 +++++
backend/src/backend/gen_context.hpp | 2 +
.../src/backend/gen_insn_gen7_schedule_info.hxx | 2 +
backend/src/backend/gen_insn_selection.cpp | 120 +++++++++++++++-----
backend/src/backend/gen_insn_selection.hxx | 2 +
backend/src/backend/gen_register.hpp | 22 ++++
backend/src/ir/instruction.hpp | 3 +-
backend/src/llvm/llvm_gen_backend.cpp | 112 ++++++++++--------
8 files changed, 214 insertions(+), 78 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 7104e81..9689ac5 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -1776,6 +1776,35 @@ namespace gbe
p->BYTE_SCATTER(src, bti, elemSize);
}
+ void GenContext::emitUnpackByteInstruction(const SelectionInstruction &insn) {
+ const GenRegister src = ra->genReg(insn.src(0));
+ for(uint32_t i = 0; i < insn.dstNum; i++) {
+ p->MOV(ra->genReg(insn.dst(i)), GenRegister::splitReg(src, insn.dstNum, i));
+ }
+ }
+
+ void GenContext::emitPackByteInstruction(const SelectionInstruction &insn) {
+ const GenRegister dst = ra->genReg(insn.dst(0));
+ p->push();
+ if(simdWidth == 8) {
+ for(uint32_t i = 0; i < insn.srcNum; i++)
+ p->MOV(GenRegister::splitReg(dst, insn.srcNum, i), ra->genReg(insn.src(i)));
+ } else {
+ // when destination expands two registers, the source must span two registers.
+ p->curr.execWidth = 8;
+ for(uint32_t i = 0; i < insn.srcNum; i++) {
+ GenRegister dsti = GenRegister::splitReg(dst, insn.srcNum, i);
+ GenRegister src = ra->genReg(insn.src(i));
+
+ p->curr.quarterControl = 0;
+ p->MOV(dsti, src);
+ p->curr.quarterControl = 1;
+ p->MOV(GenRegister::Qn(dsti,1), GenRegister::Qn(src, 1));
+ }
+ }
+ p->pop();
+ }
+
void GenContext::emitDWordGatherInstruction(const SelectionInstruction &insn) {
const GenRegister dst = ra->genReg(insn.dst(0));
const GenRegister src = ra->genReg(insn.src(0));
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 6cfc295..a853731 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -142,6 +142,8 @@ namespace gbe
void emitAtomicInstruction(const SelectionInstruction &insn);
void emitByteGatherInstruction(const SelectionInstruction &insn);
void emitByteScatterInstruction(const SelectionInstruction &insn);
+ void emitPackByteInstruction(const SelectionInstruction &insn);
+ void emitUnpackByteInstruction(const SelectionInstruction &insn);
void emitDWordGatherInstruction(const SelectionInstruction &insn);
void emitSampleInstruction(const SelectionInstruction &insn);
void emitTypedWriteInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index 13cbd41..9eb04de 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -30,6 +30,8 @@ DECL_GEN7_SCHEDULE(UntypedWrite, 80, 1, 1)
DECL_GEN7_SCHEDULE(ByteGather, 80, 1, 1)
DECL_GEN7_SCHEDULE(ByteScatter, 80, 1, 1)
DECL_GEN7_SCHEDULE(DWordGather, 80, 1, 1)
+DECL_GEN7_SCHEDULE(PackByte, 20, 1, 1)
+DECL_GEN7_SCHEDULE(UnpackByte, 20, 1, 1)
DECL_GEN7_SCHEDULE(Sample, 80, 1, 1)
DECL_GEN7_SCHEDULE(TypedWrite, 80, 1, 1)
DECL_GEN7_SCHEDULE(SpillReg, 80, 1, 1)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 46f0123..48b3069 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -529,6 +529,10 @@ namespace gbe
void BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, uint32_t bti);
/*! DWord scatter (for constant cache read) */
void DWORD_GATHER(Reg dst, Reg addr, uint32_t bti);
+ /*! Unpack the uint to char4 */
+ void UNPACK_BYTE(const GenRegister *dst, const GenRegister src, uint32_t elemNum);
+ /*! pack the char4 to uint */
+ void PACK_BYTE(const GenRegister dst, const GenRegister *src, uint32_t elemNum);
/*! Extended math function (2 arguments) */
void MATH(Reg dst, uint32_t function, Reg src0, Reg src1);
/*! Extended math function (1 argument) */
@@ -1114,6 +1118,18 @@ namespace gbe
insn->dst(0) = dst;
insn->extra.function = bti;
}
+ void Selection::Opaque::UNPACK_BYTE(const GenRegister *dst, const GenRegister src, uint32_t elemNum) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_UNPACK_BYTE, elemNum, 1);
+ insn->src(0) = src;
+ for(uint32_t i = 0; i < elemNum; i++)
+ insn->dst(i) = dst[i];
+ }
+ void Selection::Opaque::PACK_BYTE(const GenRegister dst, const GenRegister *src, uint32_t elemNum) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_PACK_BYTE, 1, elemNum);
+ for(uint32_t i = 0; i < elemNum; i++)
+ insn->src(i) = src[i];
+ insn->dst(0) = dst;
+ }
void Selection::Opaque::MATH(Reg dst, uint32_t function, Reg src0, Reg src1) {
SelectionInstruction *insn = this->appendInsn(SEL_OP_MATH, 1, 2);
@@ -2415,26 +2431,50 @@ namespace gbe
const ir::LoadInstruction &insn,
const uint32_t elemSize,
GenRegister address,
- GenRegister value,
uint32_t bti) const
{
using namespace ir;
- GBE_ASSERT(insn.getValueNum() == 1);
+ const uint32_t valueNum = insn.getValueNum();
const uint32_t simdWidth = sel.ctx.getSimdWidth();
+ if(valueNum > 1) {
+ vector<GenRegister> dst(valueNum);
+ const uint32_t typeSize = getFamilySize(getFamily(insn.getValueType()));
+
+ if(elemSize == GEN_BYTE_SCATTER_WORD) {
+ for(uint32_t i = 0; i < valueNum; i++)
+ dst[i] = sel.selReg(insn.getValue(i), ir::TYPE_U16);
+ } else if(elemSize == GEN_BYTE_SCATTER_BYTE) {
+ for(uint32_t i = 0; i < valueNum; i++)
+ dst[i] = sel.selReg(insn.getValue(i), ir::TYPE_U8);
+ }
- // We need a temporary register if we read bytes or words
- Register dst = Register(value.value.reg);
- if (elemSize == GEN_BYTE_SCATTER_WORD ||
- elemSize == GEN_BYTE_SCATTER_BYTE) {
- dst = sel.reg(FAMILY_DWORD);
- sel.BYTE_GATHER(GenRegister::fxgrf(simdWidth, dst), address, elemSize, bti);
- }
+ uint32_t tmpRegNum = typeSize*valueNum / 4;
+ vector<GenRegister> tmp(tmpRegNum);
+ for(uint32_t i = 0; i < tmpRegNum; i++) {
+ tmp[i] = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
+ }
- // Repack bytes or words using a converting mov instruction
- if (elemSize == GEN_BYTE_SCATTER_WORD)
- sel.MOV(GenRegister::retype(value, GEN_TYPE_UW), GenRegister::unpacked_uw(dst));
- else if (elemSize == GEN_BYTE_SCATTER_BYTE)
- sel.MOV(GenRegister::retype(value, GEN_TYPE_UB), GenRegister::unpacked_ub(dst));
+ sel.UNTYPED_READ(address, tmp.data(), tmpRegNum, bti);
+ for(uint32_t i = 0; i < tmpRegNum; i++) {
+ sel.UNPACK_BYTE(dst.data() + i * 4/typeSize, tmp[i], 4/typeSize);
+ }
+ } else {
+ GBE_ASSERT(insn.getValueNum() == 1);
+ const GenRegister value = sel.selReg(insn.getValue(0));
+ // We need a temporary register if we read bytes or words
+ Register dst = Register(value.value.reg);
+ if (elemSize == GEN_BYTE_SCATTER_WORD ||
+ elemSize == GEN_BYTE_SCATTER_BYTE) {
+ dst = sel.reg(FAMILY_DWORD);
+ sel.BYTE_GATHER(GenRegister::fxgrf(simdWidth, dst), address, elemSize, bti);
+ }
+
+ // Repack bytes or words using a converting mov instruction
+ if (elemSize == GEN_BYTE_SCATTER_WORD)
+ sel.MOV(GenRegister::retype(value, GEN_TYPE_UW), GenRegister::unpacked_uw(dst));
+ else if (elemSize == GEN_BYTE_SCATTER_BYTE)
+ sel.MOV(GenRegister::retype(value, GEN_TYPE_UB), GenRegister::unpacked_ub(dst));
+ }
}
void emitIndirectMove(Selection::Opaque &sel,
@@ -2469,8 +2509,7 @@ namespace gbe
else if(insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
this->emitDWordGather(sel, insn, address, 0x2);
else {
- const GenRegister value = sel.selReg(insn.getValue(0));
- this->emitByteGather(sel, insn, elemSize, address, value, 0x2);
+ this->emitByteGather(sel, insn, elemSize, address, 0x2);
}
}
else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)
@@ -2478,8 +2517,7 @@ namespace gbe
else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
this->emitUntypedRead(sel, insn, address, space == MEM_LOCAL ? 0xfe : 0x00);
else {
- const GenRegister value = sel.selReg(insn.getValue(0));
- this->emitByteGather(sel, insn, elemSize, address, value, space == MEM_LOCAL ? 0xfe : 0x01);
+ this->emitByteGather(sel, insn, elemSize, address, space == MEM_LOCAL ? 0xfe : 0x01);
}
return true;
}
@@ -2535,22 +2573,43 @@ namespace gbe
const ir::StoreInstruction &insn,
const uint32_t elemSize,
GenRegister addr,
- GenRegister value,
uint32_t bti) const
{
using namespace ir;
const uint32_t simdWidth = sel.ctx.getSimdWidth();
- const GenRegister dst = value;
-
- GBE_ASSERT(insn.getValueNum() == 1);
- if (elemSize == GEN_BYTE_SCATTER_WORD) {
- value = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
- sel.MOV(value, GenRegister::retype(dst, GEN_TYPE_UW));
- } else if (elemSize == GEN_BYTE_SCATTER_BYTE) {
- value = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
- sel.MOV(value, GenRegister::retype(dst, GEN_TYPE_UB));
+ uint32_t valueNum = insn.getValueNum();
+
+ if(valueNum > 1) {
+ const uint32_t typeSize = getFamilySize(getFamily(insn.getValueType()));
+ vector<GenRegister> value(valueNum);
+
+ if(elemSize == GEN_BYTE_SCATTER_WORD) {
+ for(uint32_t i = 0; i < valueNum; i++)
+ value[i] = sel.selReg(insn.getValue(i), ir::TYPE_U16);
+ } else if(elemSize == GEN_BYTE_SCATTER_BYTE) {
+ for(uint32_t i = 0; i < valueNum; i++)
+ value[i] = sel.selReg(insn.getValue(i), ir::TYPE_U8);
+ }
+
+ uint32_t tmpRegNum = typeSize*valueNum / 4;
+ vector<GenRegister> tmp(tmpRegNum);
+ for(uint32_t i = 0; i < tmpRegNum; i++) {
+ tmp[i] = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
+ sel.PACK_BYTE(tmp[i], value.data() + i * 4/typeSize, 4/typeSize);
+ }
+
+ sel.UNTYPED_WRITE(addr, tmp.data(), tmpRegNum, bti);
+ } else {
+ const GenRegister value = sel.selReg(insn.getValue(0));
+ GBE_ASSERT(insn.getValueNum() == 1);
+ const GenRegister tmp = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
+ if (elemSize == GEN_BYTE_SCATTER_WORD) {
+ sel.MOV(tmp, GenRegister::retype(value, GEN_TYPE_UW));
+ } else if (elemSize == GEN_BYTE_SCATTER_BYTE) {
+ sel.MOV(tmp, GenRegister::retype(value, GEN_TYPE_UB));
+ }
+ sel.BYTE_SCATTER(addr, tmp, elemSize, bti);
}
- sel.BYTE_SCATTER(addr, value, elemSize, bti);
}
INLINE bool emitOne(Selection::Opaque &sel, const ir::StoreInstruction &insn) const
@@ -2566,8 +2625,7 @@ namespace gbe
this->emitUntypedWrite(sel, insn, bti);
else {
const GenRegister address = sel.selReg(insn.getAddress());
- const GenRegister value = sel.selReg(insn.getValue(0));
- this->emitByteScatter(sel, insn, elemSize, address, value, bti);
+ this->emitByteScatter(sel, insn, elemSize, address, bti);
}
return true;
}
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index e44b9d4..564dbc5 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -58,6 +58,8 @@ DECL_SELECTION_IR(WRITE64, Write64Instruction)
DECL_SELECTION_IR(BYTE_GATHER, ByteGatherInstruction)
DECL_SELECTION_IR(BYTE_SCATTER, ByteScatterInstruction)
DECL_SELECTION_IR(DWORD_GATHER, DWordGatherInstruction)
+DECL_SELECTION_IR(PACK_BYTE, PackByteInstruction)
+DECL_SELECTION_IR(UNPACK_BYTE, UnpackByteInstruction)
DECL_SELECTION_IR(SAMPLE, SampleInstruction)
DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction)
DECL_SELECTION_IR(SPILL_REG, SpillRegInstruction)
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index 8794318..1158316 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -255,6 +255,28 @@ namespace gbe
return r;
}
+ // split a DWORD register into unpacked Byte or Short register
+ static INLINE GenRegister splitReg(GenRegister reg, uint32_t count, uint32_t sub_part) {
+ GenRegister r = reg;
+ GBE_ASSERT(count == 4 || count == 2);
+ if(reg.hstride != GEN_HORIZONTAL_STRIDE_0) {
+ r.hstride = count == 4 ? GEN_HORIZONTAL_STRIDE_4 : GEN_HORIZONTAL_STRIDE_2;
+ }
+ if(count == 4) {
+ r.type = reg.type == GEN_TYPE_UD ? GEN_TYPE_UB : GEN_TYPE_B;
+ r.vstride = GEN_VERTICAL_STRIDE_32;
+ } else {
+ r.type = reg.type == GEN_TYPE_UD ? GEN_TYPE_UW : GEN_TYPE_W;
+ r.vstride = GEN_VERTICAL_STRIDE_16;
+ }
+
+ r.subnr += sub_part*typeSize(r.type);
+ r.nr += r.subnr / 32;
+ r.subnr %= 32;
+
+ return r;
+ }
+
INLINE bool isint64(void) const {
if ((type == GEN_TYPE_UL || type == GEN_TYPE_L) && file == GEN_GENERAL_REGISTER_FILE)
return true;
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index f9f5e68..457b5b4 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -178,7 +178,8 @@ namespace ir {
template <typename T> INLINE bool isMemberOf(void) const {
return T::isClassOf(*this);
}
- static const uint32_t MAX_SRC_NUM = 16;
+ /*! max_src for store instruction (vec16 + addr) */
+ static const uint32_t MAX_SRC_NUM = 17;
static const uint32_t MAX_DST_NUM = 16;
protected:
BasicBlock *parent; //!< The basic block containing the instruction
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 4b692e2..dcc1497 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -578,7 +578,10 @@ namespace gbe
void visitInsertValueInst(InsertValueInst &I) {NOT_SUPPORTED;}
void visitExtractValueInst(ExtractValueInst &I) {NOT_SUPPORTED;}
template <bool isLoad, typename T> void visitLoadOrStore(T &I);
-
+ // batch vec4/8/16 load/store
+ INLINE void emitBatchLoadOrStore(const ir::Type type, const uint32_t elemNum,
+ Value *llvmValue, const ir::Register ptr,
+ const ir::AddressSpace addrSpace, Type * elemType, bool isLoad);
void visitInstruction(Instruction &I) {NOT_SUPPORTED;}
};
@@ -2774,6 +2777,61 @@ namespace gbe
}
void GenWriter::regAllocateStoreInst(StoreInst &I) {}
+ void GenWriter::emitBatchLoadOrStore(const ir::Type type, const uint32_t elemNum,
+ Value *llvmValues, const ir::Register ptr,
+ const ir::AddressSpace addrSpace,
+ Type * elemType, bool isLoad) {
+ const ir::RegisterFamily pointerFamily = ctx.getPointerFamily();
+ uint32_t totalSize = elemNum * getFamilySize(getFamily(type));
+ uint32_t msgNum = totalSize > 16 ? totalSize / 16 : 1;
+ const uint32_t perMsgNum = elemNum / msgNum;
+
+ for (uint32_t msg = 0; msg < msgNum; ++msg) {
+ // Build the tuple data in the vector
+ vector<ir::Register> tupleData; // put registers here
+ for (uint32_t elemID = 0; elemID < perMsgNum; ++elemID) {
+ ir::Register reg;
+ if(regTranslator.isUndefConst(llvmValues, elemID)) {
+ Value *v = Constant::getNullValue(elemType);
+ reg = this->getRegister(v);
+ } else
+ reg = this->getRegister(llvmValues, perMsgNum*msg+elemID);
+
+ tupleData.push_back(reg);
+ }
+ const ir::Tuple tuple = ctx.arrayTuple(&tupleData[0], perMsgNum);
+
+ // We may need to update to offset the pointer
+ ir::Register addr;
+ if (msg == 0)
+ addr = ptr;
+ else {
+ const ir::Register offset = ctx.reg(pointerFamily);
+ ir::ImmediateIndex immIndex;
+ ir::Type immType;
+ // each message can read/write 16 byte
+ const int32_t stride = 16;
+ if (pointerFamily == ir::FAMILY_DWORD) {
+ immIndex = ctx.newImmediate(int32_t(msg*stride));
+ immType = ir::TYPE_S32;
+ } else {
+ immIndex = ctx.newImmediate(int64_t(msg*stride));
+ immType = ir::TYPE_S64;
+ }
+
+ addr = ctx.reg(pointerFamily);
+ ctx.LOADI(immType, offset, immIndex);
+ ctx.ADD(immType, addr, ptr, offset);
+ }
+
+ // Emit the instruction
+ if (isLoad)
+ ctx.LOAD(type, tuple, addr, addrSpace, perMsgNum, true);
+ else
+ ctx.STORE(type, tuple, addr, addrSpace, perMsgNum, true);
+ }
+ }
+
extern int OCL_SIMD_WIDTH;
template <bool isLoad, typename T>
INLINE void GenWriter::emitLoadOrStore(T &I)
@@ -2811,12 +2869,14 @@ namespace gbe
// count here.
if (elemNum == 4 && regTranslator.isUndefConst(llvmValues, 3))
elemNum = 3;
+
// The code is going to be fairly different from types to types (based on
// size of each vector element)
const ir::Type type = getType(ctx, elemType);
const ir::RegisterFamily pointerFamily = ctx.getPointerFamily();
+ const ir::RegisterFamily dataFamily = getFamily(type);
- if ((type == ir::TYPE_FLOAT || type == ir::TYPE_U32 || type == ir::TYPE_S32) && addrSpace != ir::MEM_CONSTANT) {
+ if(dataFamily == ir::FAMILY_DWORD && addrSpace != ir::MEM_CONSTANT) {
// One message is enough here. Nothing special to do
if (elemNum <= 4) {
// Build the tuple data in the vector
@@ -2842,51 +2902,11 @@ namespace gbe
// Not supported by the hardware. So, we split the message and we use
// strided loads and stores
else {
- // We simply use several uint4 loads
- const uint32_t msgNum = elemNum / 4;
- for (uint32_t msg = 0; msg < msgNum; ++msg) {
- // Build the tuple data in the vector
- vector<ir::Register> tupleData; // put registers here
- for (uint32_t elemID = 0; elemID < 4; ++elemID) {
- ir::Register reg;
- if(regTranslator.isUndefConst(llvmValues, elemID)) {
- Value *v = Constant::getNullValue(elemType);
- reg = this->getRegister(v);
- } else
- reg = this->getRegister(llvmValues, 4*msg+elemID);
-
- tupleData.push_back(reg);
- }
- const ir::Tuple tuple = ctx.arrayTuple(&tupleData[0], 4);
-
- // We may need to update to offset the pointer
- ir::Register addr;
- if (msg == 0)
- addr = ptr;
- else {
- const ir::Register offset = ctx.reg(pointerFamily);
- ir::ImmediateIndex immIndex;
- ir::Type immType;
- if (pointerFamily == ir::FAMILY_DWORD) {
- immIndex = ctx.newImmediate(int32_t(msg*sizeof(uint32_t[4])));
- immType = ir::TYPE_S32;
- } else {
- immIndex = ctx.newImmediate(int64_t(msg*sizeof(uint64_t[4])));
- immType = ir::TYPE_S64;
- }
-
- addr = ctx.reg(pointerFamily);
- ctx.LOADI(immType, offset, immIndex);
- ctx.ADD(immType, addr, ptr, offset);
- }
-
- // Emit the instruction
- if (isLoad)
- ctx.LOAD(type, tuple, addr, addrSpace, 4, true);
- else
- ctx.STORE(type, tuple, addr, addrSpace, 4, true);
- }
+ emitBatchLoadOrStore(type, elemNum, llvmValues, ptr, addrSpace, elemType, isLoad);
}
+ }
+ else if((dataFamily==ir::FAMILY_WORD && elemNum%2==0) || (dataFamily == ir::FAMILY_BYTE && elemNum%4 == 0)) {
+ emitBatchLoadOrStore(type, elemNum, llvmValues, ptr, addrSpace, elemType, isLoad);
} else {
for (uint32_t elemID = 0; elemID < elemNum; elemID++) {
if(regTranslator.isUndefConst(llvmValues, elemID))
--
1.7.9.5
More information about the Beignet
mailing list