[Beignet] [PATCH] generate MOV instruction at selection stage when do simd_shuffle with imm value.
Guo Yejun
yejun.guo at intel.com
Thu Aug 27 16:05:56 PDT 2015
the earlier the instruction is generated, the more possible optimization
could be applied.
Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
backend/src/backend/gen8_context.cpp | 22 ++++++--------
backend/src/backend/gen_context.cpp | 46 +++++++++++++-----------------
backend/src/backend/gen_insn_selection.cpp | 10 +++++--
backend/src/backend/gen_register.hpp | 8 ++++++
4 files changed, 44 insertions(+), 42 deletions(-)
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index b497ee5..f02786c 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -259,20 +259,14 @@ namespace gbe
const GenRegister src0 = ra->genReg(insn.src(0));
const GenRegister src1 = ra->genReg(insn.src(1));
assert(insn.opcode == SEL_OP_SIMD_SHUFFLE);
-
- uint32_t simd = p->curr.execWidth;
- if (src1.file == GEN_IMMEDIATE_VALUE) {
- uint32_t offset = src1.value.ud % simd;
- GenRegister reg = GenRegister::suboffset(src0, offset);
- p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type));
- } else {
- uint32_t base = src0.nr * 32 + src0.subnr * 4;
- GenRegister baseReg = GenRegister::immuw(base);
- const GenRegister a0 = GenRegister::addr8(0);
- p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
- GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
- p->MOV(dst, indirect);
- }
+ assert (src1.file != GEN_IMMEDIATE_VALUE);
+
+ uint32_t base = src0.nr * 32 + src0.subnr * 4;
+ GenRegister baseReg = GenRegister::immuw(base);
+ const GenRegister a0 = GenRegister::addr8(0);
+ p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+ GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
+ p->MOV(dst, indirect);
}
void Gen8Context::emitBinaryInstruction(const SelectionInstruction &insn) {
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 25fdf08..c2be7aa 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -585,35 +585,29 @@ namespace gbe
const GenRegister src0 = ra->genReg(insn.src(0));
const GenRegister src1 = ra->genReg(insn.src(1));
assert(insn.opcode == SEL_OP_SIMD_SHUFFLE);
+ assert (src1.file != GEN_IMMEDIATE_VALUE);
+ uint32_t base = src0.nr * 32 + src0.subnr * 4;
+ GenRegister baseReg = GenRegister::immuw(base);
+ const GenRegister a0 = GenRegister::addr8(0);
uint32_t simd = p->curr.execWidth;
- if (src1.file == GEN_IMMEDIATE_VALUE) {
- uint32_t offset = src1.value.ud % simd;
- GenRegister reg = GenRegister::suboffset(src0, offset);
- p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type));
- } else {
- uint32_t base = src0.nr * 32 + src0.subnr * 4;
- GenRegister baseReg = GenRegister::immuw(base);
- const GenRegister a0 = GenRegister::addr8(0);
+ p->push();
+ if (simd == 8) {
+ p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+ GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
+ p->MOV(dst, indirect);
+ } else if (simd == 16) {
+ p->curr.execWidth = 8;
+ p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+ GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
+ p->MOV(dst, indirect);
- p->push();
- if (simd == 8) {
- p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
- GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
- p->MOV(dst, indirect);
- } else if (simd == 16) {
- p->curr.execWidth = 8;
- p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
- GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
- p->MOV(dst, indirect);
-
- p->curr.quarterControl = 1;
- p->ADD(a0, GenRegister::unpacked_uw(src1.nr+1, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
- p->MOV(GenRegister::offset(dst, 1, 0), indirect);
- } else
- NOT_IMPLEMENTED;
- p->pop();
- }
+ p->curr.quarterControl = 1;
+ p->ADD(a0, GenRegister::unpacked_uw(src1.nr+1, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+ p->MOV(GenRegister::offset(dst, 1, 0), indirect);
+ } else
+ NOT_IMPLEMENTED;
+ p->pop();
}
void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index ab00269..88d6dcc 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -5033,8 +5033,14 @@ namespace gbe
sel.curr.predicate = GEN_PREDICATE_NONE;
sel.curr.noMask = 1;
}
- if (src1.file == GEN_IMMEDIATE_VALUE)
- sel.SIMD_SHUFFLE(dst, src0, src1);
+ if (src1.file == GEN_IMMEDIATE_VALUE) {
+ uint32_t offset = src1.value.ud % sel.curr.execWidth;
+ GenRegister reg = GenRegister::subphysicaloffset(src0, offset);
+ reg.vstride = GEN_VERTICAL_STRIDE_0;
+ reg.hstride = GEN_HORIZONTAL_STRIDE_0;
+ reg.width = GEN_WIDTH_1;
+ sel.MOV(dst, reg);
+ }
else {
GenRegister shiftL = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
sel.SHL(shiftL, src1, GenRegister::immud(0x2));
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index 4f37e30..a63d693 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -946,6 +946,14 @@ namespace gbe
return reg;
}
+ static INLINE GenRegister subphysicaloffset(GenRegister reg, uint32_t delta) {
+ if (reg.hstride != GEN_HORIZONTAL_STRIDE_0) {
+ reg.subnr += delta * typeSize(reg.type) * hstride_size(reg);
+ reg.subphysical = 1;
+ }
+ return reg;
+ }
+
static INLINE GenRegister df16(uint32_t file, uint32_t nr, uint32_t subnr) {
return retype(vec16(file, nr, subnr), GEN_TYPE_DF);
}
--
1.9.1
More information about the Beignet
mailing list