[Beignet] [PATCH 1/2] GBE: fix insntruction scheduling related bugs in read64/write64.
Zhigang Gong
zhigang.gong at gmail.com
Thu Aug 8 00:15:43 PDT 2013
In read64 and write64, we allocate some temporary registers and
we should put all of those temporary registers may be modified
to the instruction's dst array. Otherwise, the latter post instruction
scheduling may rearrange the instruction incorrectly.
Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
---
backend/src/backend/gen_context.cpp | 13 ++++---
backend/src/backend/gen_encoder.cpp | 6 ---
backend/src/backend/gen_insn_selection.cpp | 57 ++++++++++++++++------------
3 files changed, 39 insertions(+), 37 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 621e7be..6de6cf1 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -601,10 +601,10 @@ namespace gbe
void GenContext::emitRead64Instruction(const SelectionInstruction &insn) {
const uint32_t elemNum = insn.extra.elem;
const uint32_t tmpRegSize = (p->curr.execWidth == 8) ? elemNum * 2 : elemNum;
- const GenRegister dst = ra->genReg(insn.dst(tmpRegSize));
- const GenRegister tmp = ra->genReg(insn.dst(0));
+ const GenRegister tempAddr = ra->genReg(insn.dst(0));
+ const GenRegister dst = ra->genReg(insn.dst(tmpRegSize + 1));
+ const GenRegister tmp = ra->genReg(insn.dst(1));
const GenRegister src = ra->genReg(insn.src(0));
- const GenRegister tempAddr = ra->genReg(insn.src(1));
const uint32_t bti = insn.extra.function;
p->READ64(dst, tmp, tempAddr, src, bti, elemNum);
}
@@ -621,11 +621,12 @@ namespace gbe
// then follow the real destination registers.
// For SIMD16, we allocate elemNum temporary registers from dst(0).
void GenContext::emitWrite64Instruction(const SelectionInstruction &insn) {
- const GenRegister src = ra->genReg(insn.src(0));
+ const GenRegister src = ra->genReg(insn.dst(0));
const uint32_t elemNum = insn.extra.elem;
- const uint32_t tmpRegSize = (p->curr.execWidth == 8) ? elemNum * 2 : elemNum;
- const GenRegister data = ra->genReg(insn.src(tmpRegSize + 1));
+ const GenRegister addr = ra->genReg(insn.src(0)); //tmpRegSize + 1));
+ const GenRegister data = ra->genReg(insn.src(1));
const uint32_t bti = insn.extra.function;
+ p->MOV(src, addr);
p->WRITE64(src, data, bti, elemNum);
}
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 4d6aa34..1a459e1 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -412,12 +412,6 @@ namespace gbe
curr.noMask = originMask;
this->UNTYPED_WRITE(msg, bti, elemNum);
}
- /* Let's restore the original message(addr) register. */
- /* XXX could be optimized if we don't allocate the address to the header
- position of the message. */
- curr.predicate = GEN_PREDICATE_NONE;
- curr.noMask = GEN_MASK_DISABLE;
- ADD(msg, GenRegister::retype(msg, GEN_TYPE_UD), GenRegister::immd(-4));
pop();
}
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 1a3af68..a361ab3 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -479,7 +479,7 @@ namespace gbe
/*! Read 64 bits float/int array */
void READ64(Reg addr, Reg tempAddr, const GenRegister *dst, uint32_t elemNum, uint32_t valueNum, uint32_t bti);
/*! Write 64 bits float/int array */
- void WRITE64(Reg addr, const GenRegister *src, uint32_t elemNum, uint32_t valueNum, uint32_t bti);
+ void WRITE64(Reg addr, const GenRegister *src, uint32_t srcNum, const GenRegister *dst, uint32_t dstNum, uint32_t bti);
/*! Untyped read (up to 4 elements) */
void UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti);
/*! Untyped write (up to 4 elements) */
@@ -825,28 +825,29 @@ namespace gbe
/* elemNum contains all the temporary register and the
real destination registers.*/
void Selection::Opaque::READ64(Reg addr,
- Reg tempAddr,
- const GenRegister *dst,
- uint32_t elemNum,
- uint32_t valueNum,
- uint32_t bti)
+ Reg tempAddr,
+ const GenRegister *dst,
+ uint32_t elemNum,
+ uint32_t valueNum,
+ uint32_t bti)
{
- SelectionInstruction *insn = this->appendInsn(SEL_OP_READ64, elemNum, 2);
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_READ64, elemNum + 1, 1);
SelectionVector *srcVector = this->appendVector();
SelectionVector *dstVector = this->appendVector();
+ /* temporary addr register is to be modified, set it to dst registers.*/
+ insn->dst(0) = tempAddr;
// Regular instruction to encode
for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
- insn->dst(elemID) = dst[elemID];
+ insn->dst(elemID + 1) = dst[elemID];
insn->src(0) = addr;
- insn->src(1) = tempAddr;
insn->extra.function = bti;
insn->extra.elem = valueNum;
// Only the temporary registers need contiguous allocation
dstVector->regNum = elemNum - valueNum;
dstVector->isSrc = 0;
- dstVector->reg = &insn->dst(0);
+ dstVector->reg = &insn->dst(1);
// Source cannot be scalar (yet)
srcVector->regNum = 1;
@@ -883,24 +884,27 @@ namespace gbe
/* elemNum contains all the temporary register and the
real data registers.*/
void Selection::Opaque::WRITE64(Reg addr,
- const GenRegister *src,
- uint32_t elemNum,
- uint32_t valueNum,
- uint32_t bti)
+ const GenRegister *src,
+ uint32_t srcNum,
+ const GenRegister *dst,
+ uint32_t dstNum,
+ uint32_t bti)
{
- SelectionInstruction *insn = this->appendInsn(SEL_OP_WRITE64, 0, elemNum+1);
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_WRITE64, dstNum, srcNum + 1);
SelectionVector *vector = this->appendVector();
// Regular instruction to encode
insn->src(0) = addr;
- for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
- insn->src(elemID+1) = src[elemID];
+ for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+ insn->src(elemID + 1) = src[elemID];
+ for (uint32_t elemID = 0; elemID < dstNum; ++elemID)
+ insn->dst(elemID) = dst[elemID];
insn->extra.function = bti;
- insn->extra.elem = valueNum;
+ insn->extra.elem = srcNum;
// Only the addr + temporary registers need to be contiguous.
- vector->regNum = (elemNum - valueNum) + 1;
- vector->reg = &insn->src(0);
+ vector->regNum = dstNum;
+ vector->reg = &insn->dst(0);
vector->isSrc = 1;
}
@@ -2085,13 +2089,16 @@ namespace gbe
addr = GenRegister::retype(sel.selReg(insn.getSrc(addrID)), GEN_TYPE_F);
// The first 16 DWORD register space is for temporary usage at encode stage.
uint32_t tmpRegNum = (sel.ctx.getSimdWidth() == 8) ? valueNum * 2 : valueNum;
- GenRegister src[valueNum + tmpRegNum];
+ GenRegister src[valueNum];
+ GenRegister dst[tmpRegNum + 1];
+ /* dst 0 is for the temporary address register. */
+ dst[0] = sel.selReg(sel.reg(FAMILY_DWORD));
for (srcID = 0; srcID < tmpRegNum; ++srcID)
- src[srcID] = sel.selReg(sel.reg(FAMILY_DWORD));
+ dst[srcID + 1] = sel.selReg(sel.reg(FAMILY_DWORD));
- for (uint32_t valueID = 0; valueID < valueNum; ++srcID, ++valueID)
- src[srcID] = sel.selReg(insn.getValue(valueID));
- sel.WRITE64(addr, src, valueNum + tmpRegNum, valueNum, bti);
+ for (uint32_t valueID = 0; valueID < valueNum; ++valueID)
+ src[valueID] = sel.selReg(insn.getValue(valueID));
+ sel.WRITE64(addr, src, valueNum, dst, tmpRegNum + 1, bti);
}
void emitByteScatter(Selection::Opaque &sel,
--
1.7.9.5
More information about the Beignet
mailing list