[Beignet] [PATCH 2/2] Backend: Refine image block read with less vector and dst tmp
Xiuli Pan
xiuli.pan at intel.com
Mon Aug 22 06:03:04 UTC 2016
From: Pan Xiuli <xiuli.pan at intel.com>
Image block read in simd16 mode need to spilt into 2 simd8 send message
refine the code to share tmp vector regs.
Also fix some simd8 wrong tmp regs.
Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
backend/src/backend/gen_context.cpp | 30 ++++++++++++-----------------
backend/src/backend/gen_insn_selection.cpp | 31 ++++++++++++++++--------------
2 files changed, 29 insertions(+), 32 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index f1c58eb..4b0b0cd 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -3662,18 +3662,13 @@ namespace gbe
}
void GenContext::emitMBReadInstruction(const SelectionInstruction &insn) {
- const GenRegister dst = ra->genReg(insn.dst(0));
+ const GenRegister dst = ra->genReg(insn.dst(1));
const GenRegister coordx = GenRegister::toUniform(ra->genReg(insn.src(0)),GEN_TYPE_D);
const GenRegister coordy = GenRegister::toUniform(ra->genReg(insn.src(1)),GEN_TYPE_D);
- GenRegister header, offsetx, offsety, blocksizereg;
- if (simdWidth == 8)
- header = GenRegister::retype(ra->genReg(insn.dst(0)), GEN_TYPE_UD);
- else
- header = GenRegister::retype(GenRegister::Qn(ra->genReg(insn.src(2)),1), GEN_TYPE_UD);
-
- offsetx = GenRegister::offset(header, 0, 0*4);
- offsety = GenRegister::offset(header, 0, 1*4);
- blocksizereg = GenRegister::offset(header, 0, 2*4);
+ const GenRegister header = GenRegister::retype(ra->genReg(insn.dst(0)), GEN_TYPE_UD);
+ const GenRegister offsetx = GenRegister::offset(header, 0, 0*4);
+ const GenRegister offsety = GenRegister::offset(header, 0, 1*4);
+ const GenRegister blocksizereg = GenRegister::offset(header, 0, 2*4);
size_t vec_size = insn.extra.elem;
uint32_t blocksize = 0x1F | (vec_size-1) << 16;
@@ -3700,7 +3695,7 @@ namespace gbe
}
else if (simdWidth == 16)
{
- const GenRegister tmp = ra->genReg(insn.dst(vec_size));
+ const GenRegister tmp = GenRegister::retype(ra->genReg(insn.dst(vec_size + 1)), GEN_TYPE_UD);
p->push();
// Copy r0 into the header first
p->curr.execWidth = 8;
@@ -3718,23 +3713,22 @@ namespace gbe
// Now read the data
p->curr.execWidth = 8;
p->MBREAD(tmp, header, insn.getbti(), vec_size);
+ for (uint32_t i = 0; i < vec_size; i++)
+ p->MOV(ra->genReg(insn.dst(i + 1)), GenRegister::offset(tmp, i));
// Second half
// Update the header with the coord
p->curr.execWidth = 1;
p->ADD(offsetx, offsetx, GenRegister::immud(32));
- const GenRegister tmp2 = GenRegister::offset(tmp, vec_size);
// Now read the data
p->curr.execWidth = 8;
- p->MBREAD(tmp2, header, insn.getbti(), vec_size);
+ p->MBREAD(tmp, header, insn.getbti(), vec_size);
// Move the reg to fit vector rule.
- for (uint32_t i = 0; i < vec_size; i++) {
- p->MOV(GenRegister::offset(dst, i * 2), GenRegister::offset(tmp, i));
- p->MOV(GenRegister::offset(dst, i * 2 + 1),
- GenRegister::offset(tmp2, i));
- }
+ for (uint32_t i = 0; i < vec_size; i++)
+ p->MOV(GenRegister::offset(ra->genReg(insn.dst(i + 1)), 1),
+ GenRegister::offset(tmp, i));
p->pop();
} else NOT_IMPLEMENTED;
}
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index c4e2934..3b21fb5 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2086,30 +2086,33 @@ namespace gbe
uint32_t vec_size) {
uint32_t simdWidth = curr.execWidth;
- SelectionInstruction *insn = this->appendInsn(SEL_OP_MBREAD, vec_size * simdWidth / 8, 3);
- SelectionVector *vector = this->appendVector();
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_MBREAD, vec_size * simdWidth / 8 + 1, 2);
+
+ insn->dst(0) = header;
for (uint32_t i = 0; i < vec_size; ++i) {
- insn->dst(i) = dsts[i];
+ insn->dst(i + 1) = dsts[i];
if(simdWidth == 16)
- insn->dst(i + vec_size) = tmp[i];
+ insn->dst(i + vec_size + 1) = tmp[i];
}
insn->src(0) = coordx;
insn->src(1) = coordy;
- insn->src(2) = header;
insn->setbti(bti);
insn->extra.elem = vec_size; // vector size
- vector->regNum = vec_size;
- vector->reg = &insn->dst(0);
- vector->offsetID = 0;
- vector->isSrc = 0;
-
+ // Only in simd 8 the data is in vector form
+ if(simdWidth == 8) {
+ SelectionVector *vector = this->appendVector();
+ vector->regNum = vec_size;
+ vector->reg = &insn->dst(1);
+ vector->offsetID = 1;
+ vector->isSrc = 0;
+ }
if(simdWidth == 16)
{
SelectionVector *vectortmp = this->appendVector();
vectortmp->regNum = vec_size;
- vectortmp->reg = &insn->dst(vec_size);
- vectortmp->offsetID = vec_size;
+ vectortmp->reg = &insn->dst(vec_size + 1);
+ vectortmp->offsetID = vec_size + 1;
vectortmp->isSrc = 0;
}
}
@@ -6708,11 +6711,11 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
for (uint32_t i = 0; i < vec_size; ++i) {
valuesVec.push_back(sel.selReg(insn.getDst(i), TYPE_U32));
if(simdWidth == 16)
- tmpVec.push_back(sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32));
+ tmpVec.push_back(GenRegister::retype(GenRegister::f8grf(sel.reg(FAMILY_DWORD)), TYPE_U32));
}
const GenRegister coordx = sel.selReg(insn.getSrc(0), TYPE_U32);
const GenRegister coordy = sel.selReg(insn.getSrc(1), TYPE_U32);
- const GenRegister header = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
+ const GenRegister header = GenRegister::retype(GenRegister::f8grf(sel.reg(FAMILY_DWORD)), TYPE_U32);
GenRegister *tmp = NULL;
if(simdWidth == 16)
tmp = &tmpVec[0];
--
2.7.4
More information about the Beignet
mailing list