[Beignet] [PATCH V3] Use a separate pattern for simd shuffle instead of binary pattern
Guo Yejun
yejun.guo at intel.com
Fri Jul 17 00:49:44 PDT 2015
the binary pattern assumes the two src operands have the same type,
while simd shuffle is not the case, so add a separate pattern for it.
v2: use a different way to obtain imm data
do not use GenRegister::udxgrf, use sel.selReg instead
add SimdShuffleInstruction::wellFormed
v3: refine SimdShuffleInstruction::wellFormed
set dag.child[0]->root = 1
Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
backend/src/backend/gen8_context.cpp | 38 ++++++------
backend/src/backend/gen8_context.hpp | 1 +
backend/src/backend/gen_context.cpp | 68 ++++++++++++----------
backend/src/backend/gen_context.hpp | 1 +
.../src/backend/gen_insn_gen7_schedule_info.hxx | 1 +
backend/src/backend/gen_insn_selection.cpp | 63 ++++++++++++++++----
backend/src/backend/gen_insn_selection.hxx | 2 +-
backend/src/ir/instruction.cpp | 38 +++++++++++-
backend/src/ir/instruction.hpp | 8 +++
backend/src/ir/instruction.hxx | 2 +-
10 files changed, 158 insertions(+), 64 deletions(-)
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 66bb54a..b497ee5 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -254,6 +254,27 @@ namespace gbe
}
}
+ void Gen8Context::emitSimdShuffleInstruction(const SelectionInstruction &insn) {
+ const GenRegister dst = ra->genReg(insn.dst(0));
+ const GenRegister src0 = ra->genReg(insn.src(0));
+ const GenRegister src1 = ra->genReg(insn.src(1));
+ assert(insn.opcode == SEL_OP_SIMD_SHUFFLE);
+
+ uint32_t simd = p->curr.execWidth;
+ if (src1.file == GEN_IMMEDIATE_VALUE) {
+ uint32_t offset = src1.value.ud % simd;
+ GenRegister reg = GenRegister::suboffset(src0, offset);
+ p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type));
+ } else {
+ uint32_t base = src0.nr * 32 + src0.subnr * 4;
+ GenRegister baseReg = GenRegister::immuw(base);
+ const GenRegister a0 = GenRegister::addr8(0);
+ p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+ GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
+ p->MOV(dst, indirect);
+ }
+ }
+
void Gen8Context::emitBinaryInstruction(const SelectionInstruction &insn) {
const GenRegister dst = ra->genReg(insn.dst(0));
const GenRegister src0 = ra->genReg(insn.src(0));
@@ -273,23 +294,6 @@ namespace gbe
p->ADD(dst, dst, src1);
break;
}
- case SEL_OP_SIMD_SHUFFLE:
- {
- uint32_t simd = p->curr.execWidth;
- if (src1.file == GEN_IMMEDIATE_VALUE) {
- uint32_t offset = src1.value.ud % simd;
- GenRegister reg = GenRegister::suboffset(src0, offset);
- p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type));
- } else {
- uint32_t base = src0.nr * 32 + src0.subnr * 4;
- GenRegister baseReg = GenRegister::immuw(base);
- const GenRegister a0 = GenRegister::addr8(0);
- p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
- GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
- p->MOV(dst, indirect);
- }
- break;
- }
default:
GenContext::emitBinaryInstruction(insn);
}
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index 4f164ce..84508e9 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -52,6 +52,7 @@ namespace gbe
virtual void emitUnaryInstruction(const SelectionInstruction &insn);
virtual void emitUnaryWithTempInstruction(const SelectionInstruction &insn);
+ virtual void emitSimdShuffleInstruction(const SelectionInstruction &insn);
virtual void emitBinaryInstruction(const SelectionInstruction &insn);
virtual void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
virtual void emitI64MULHIInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index db27377..e16b0a9 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -545,6 +545,42 @@ namespace gbe
}
}
+ void GenContext::emitSimdShuffleInstruction(const SelectionInstruction &insn) {
+ const GenRegister dst = ra->genReg(insn.dst(0));
+ const GenRegister src0 = ra->genReg(insn.src(0));
+ const GenRegister src1 = ra->genReg(insn.src(1));
+ assert(insn.opcode == SEL_OP_SIMD_SHUFFLE);
+
+ uint32_t simd = p->curr.execWidth;
+ if (src1.file == GEN_IMMEDIATE_VALUE) {
+ uint32_t offset = src1.value.ud % simd;
+ GenRegister reg = GenRegister::suboffset(src0, offset);
+ p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type));
+ } else {
+ uint32_t base = src0.nr * 32 + src0.subnr * 4;
+ GenRegister baseReg = GenRegister::immuw(base);
+ const GenRegister a0 = GenRegister::addr8(0);
+
+ p->push();
+ if (simd == 8) {
+ p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+ GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
+ p->MOV(dst, indirect);
+ } else if (simd == 16) {
+ p->curr.execWidth = 8;
+ p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+ GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
+ p->MOV(dst, indirect);
+
+ p->curr.quarterControl = 1;
+ p->ADD(a0, GenRegister::unpacked_uw(src1.nr+1, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+ p->MOV(GenRegister::offset(dst, 1, 0), indirect);
+ } else
+ NOT_IMPLEMENTED;
+ p->pop();
+ }
+ }
+
void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
const GenRegister dst = ra->genReg(insn.dst(0));
const GenRegister src0 = ra->genReg(insn.src(0));
@@ -595,38 +631,6 @@ namespace gbe
p->MOV(xdst.bottom_half(), xsrc1.bottom_half());
}
break;
- case SEL_OP_SIMD_SHUFFLE:
- {
- uint32_t simd = p->curr.execWidth;
- if (src1.file == GEN_IMMEDIATE_VALUE) {
- uint32_t offset = src1.value.ud % simd;
- GenRegister reg = GenRegister::suboffset(src0, offset);
- p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type));
- } else {
- uint32_t base = src0.nr * 32 + src0.subnr * 4;
- GenRegister baseReg = GenRegister::immuw(base);
- const GenRegister a0 = GenRegister::addr8(0);
-
- p->push();
- if (simd == 8) {
- p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
- GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
- p->MOV(dst, indirect);
- } else if (simd == 16) {
- p->curr.execWidth = 8;
- p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
- GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
- p->MOV(dst, indirect);
-
- p->curr.quarterControl = 1;
- p->ADD(a0, GenRegister::unpacked_uw(src1.nr+1, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
- p->MOV(GenRegister::offset(dst, 1, 0), indirect);
- } else
- NOT_IMPLEMENTED;
- p->pop();
- }
- }
- break;
default: NOT_IMPLEMENTED;
}
}
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index d387387..69fe513 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -127,6 +127,7 @@ namespace gbe
virtual void emitUnaryInstruction(const SelectionInstruction &insn);
virtual void emitUnaryWithTempInstruction(const SelectionInstruction &insn);
virtual void emitBinaryInstruction(const SelectionInstruction &insn);
+ virtual void emitSimdShuffleInstruction(const SelectionInstruction &insn);
virtual void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
void emitTernaryInstruction(const SelectionInstruction &insn);
virtual void emitI64MULHIInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index d054820..d073770 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -3,6 +3,7 @@ DECL_GEN7_SCHEDULE(Label, 0, 0, 0)
DECL_GEN7_SCHEDULE(Unary, 20, 4, 2)
DECL_GEN7_SCHEDULE(UnaryWithTemp, 20, 40, 20)
DECL_GEN7_SCHEDULE(Binary, 20, 4, 2)
+DECL_GEN7_SCHEDULE(SimdShuffle, 20, 4, 2)
DECL_GEN7_SCHEDULE(BinaryWithTemp, 20, 40, 20)
DECL_GEN7_SCHEDULE(Ternary, 20, 4, 2)
DECL_GEN7_SCHEDULE(I64Shift, 20, 40, 20)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index af5ab9c..b0ba9e3 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -543,7 +543,6 @@ namespace gbe
ALU1(RNDD)
ALU1(RNDU)
ALU2(MACH)
- ALU2(SIMD_SHUFFLE)
ALU1(LZD)
ALU3(MAD)
ALU2WithTemp(MUL_HI)
@@ -565,6 +564,8 @@ namespace gbe
#undef ALU2WithTemp
#undef ALU3
#undef I64Shift
+ /*! simd shuffle */
+ void SIMD_SHUFFLE(Reg dst, Reg src0, Reg src1);
/*! Convert 64-bit integer to 32-bit float */
void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[6]);
/*! Convert 64-bit integer to 32-bit float */
@@ -1652,6 +1653,14 @@ namespace gbe
insn->src(2) = src2;
}
+ void Selection::Opaque::SIMD_SHUFFLE(Reg dst, Reg src0, Reg src1)
+ {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_SIMD_SHUFFLE, 1, 2);
+ insn->dst(0) = dst;
+ insn->src(0) = src0;
+ insn->src(1) = src1;
+ }
+
void Selection::Opaque::I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3]) {
SelectionInstruction *insn = this->appendInsn(SEL_OP_I64CMP, 3, 2);
insn->src(0) = src0;
@@ -2815,17 +2824,6 @@ namespace gbe
case OP_UPSAMPLE_LONG:
sel.UPSAMPLE_LONG(dst, src0, src1);
break;
- case OP_SIMD_SHUFFLE:
- {
- if (src1.file == GEN_IMMEDIATE_VALUE)
- sel.SIMD_SHUFFLE(dst, src0, src1);
- else {
- GenRegister shiftL = GenRegister::udxgrf(sel.curr.execWidth, sel.reg(FAMILY_DWORD));
- sel.SHL(shiftL, src1, GenRegister::immud(0x2));
- sel.SIMD_SHUFFLE(dst, src0, shiftL);
- }
- }
- break;
default: NOT_IMPLEMENTED;
}
sel.pop();
@@ -4973,6 +4971,46 @@ namespace gbe
}
};
+ class SimdShuffleInstructionPattern : public SelectionPattern
+ {
+ public:
+ SimdShuffleInstructionPattern(void) : SelectionPattern(1,1) {
+ this->opcodes.push_back(ir::OP_SIMD_SHUFFLE);
+ }
+ INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
+ using namespace ir;
+ const ir::SimdShuffleInstruction &insn = cast<SimdShuffleInstruction>(dag.insn);
+ assert(insn.getOpcode() == OP_SIMD_SHUFFLE);
+ const Type type = insn.getType();
+ GenRegister dst = sel.selReg(insn.getDst(0), type);
+ GenRegister src0 = sel.selReg(insn.getSrc(0), type);
+ GenRegister src1;
+
+ SelectionDAG *dag0 = dag.child[0];
+ SelectionDAG *dag1 = dag.child[1];
+ if (dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI && canGetRegisterFromImmediate(dag1->insn)) {
+ const auto &childInsn = cast<LoadImmInstruction>(dag1->insn);
+ src1 = getRegisterFromImmediate(childInsn.getImmediate(), TYPE_U32);
+ if (dag0) dag0->isRoot = 1;
+ } else {
+ markAllChildren(dag);
+ src1 = sel.selReg(insn.getSrc(1), TYPE_U32);
+ }
+
+ sel.push();
+ if (src1.file == GEN_IMMEDIATE_VALUE)
+ sel.SIMD_SHUFFLE(dst, src0, src1);
+ else {
+ GenRegister shiftL = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
+ sel.SHL(shiftL, src1, GenRegister::immud(0x2));
+ sel.SIMD_SHUFFLE(dst, src0, shiftL);
+ }
+ sel.pop();
+ return true;
+ }
+
+ };
+
/*! Get a region of a register */
class RegionInstructionPattern : public SelectionPattern
{
@@ -5247,6 +5285,7 @@ namespace gbe
this->insert<GetImageInfoInstructionPattern>();
this->insert<ReadARFInstructionPattern>();
this->insert<RegionInstructionPattern>();
+ this->insert<SimdShuffleInstructionPattern>();
this->insert<IndirectMovInstructionPattern>();
this->insert<NullaryInstructionPattern>();
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 79f2ce1..adbb137 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -26,7 +26,7 @@ DECL_SELECTION_IR(SHL, BinaryInstruction)
DECL_SELECTION_IR(RSR, BinaryInstruction)
DECL_SELECTION_IR(RSL, BinaryInstruction)
DECL_SELECTION_IR(ASR, BinaryInstruction)
-DECL_SELECTION_IR(SIMD_SHUFFLE, BinaryInstruction)
+DECL_SELECTION_IR(SIMD_SHUFFLE, SimdShuffleInstruction)
DECL_SELECTION_IR(I64SHR, I64ShiftInstruction)
DECL_SELECTION_IR(I64SHL, I64ShiftInstruction)
DECL_SELECTION_IR(I64ASR, I64ShiftInstruction)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 12d70a6..f93c528 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -741,6 +741,22 @@ namespace ir {
Register src[0];
};
+ class ALIGNED_INSTRUCTION SimdShuffleInstruction : public NaryInstruction<2>
+ {
+ public:
+ SimdShuffleInstruction(Type type,
+ Register dst,
+ Register src0,
+ Register src1) {
+ this->opcode = OP_SIMD_SHUFFLE;
+ this->type = type;
+ this->dst[0] = dst;
+ this->src[0] = src0;
+ this->src[1] = src1;
+ }
+ INLINE bool wellFormed(const Function &fn, std::string &why) const;
+ };
+
class ALIGNED_INSTRUCTION RegionInstruction :
public BasePolicy,
public NSrcPolicy<RegionInstruction, 1>,
@@ -1154,6 +1170,19 @@ namespace ir {
return true;
}
+ INLINE bool SimdShuffleInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+ {
+ if (UNLIKELY( this->type != TYPE_U32 && this->type != TYPE_S32 && this->type != TYPE_FLOAT)) {
+ whyNot = "Only support S32/U32/FLOAT type";
+ return false;
+ }
+
+ if (UNLIKELY(checkRegisterData(FAMILY_DWORD, src[1], fn, whyNot) == false))
+ return false;
+
+ return true;
+ }
+
INLINE bool RegionInstruction::wellFormed(const Function &fn, std::string &whyNot) const
{
if (UNLIKELY(checkRegisterData(FAMILY_DWORD, src[0], fn, whyNot) == false))
@@ -1461,6 +1490,10 @@ START_INTROSPECTION(RegionInstruction)
#include "ir/instruction.hxx"
END_INTROSPECTION(RegionInstruction)
+START_INTROSPECTION(SimdShuffleInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(SimdShuffleInstruction)
+
START_INTROSPECTION(IndirectMovInstruction)
#include "ir/instruction.hxx"
END_INTROSPECTION(IndirectMovInstruction)
@@ -1652,6 +1685,7 @@ DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())
DECL_MEM_FN(SyncInstruction, uint32_t, getParameters(void), getParameters())
DECL_MEM_FN(ReadARFInstruction, Type, getType(void), getType())
DECL_MEM_FN(ReadARFInstruction, ARFRegister, getARFRegister(void), getARFRegister())
+DECL_MEM_FN(SimdShuffleInstruction, Type, getType(void), getType())
DECL_MEM_FN(RegionInstruction, uint32_t, getOffset(void), getOffset())
DECL_MEM_FN(IndirectMovInstruction, uint32_t, getOffset(void), getOffset())
DECL_MEM_FN(IndirectMovInstruction, Type, getType(void), getType())
@@ -1751,7 +1785,6 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex
DECL_EMIT_FUNCTION(RHADD)
DECL_EMIT_FUNCTION(I64HADD)
DECL_EMIT_FUNCTION(I64RHADD)
- DECL_EMIT_FUNCTION(SIMD_SHUFFLE)
#undef DECL_EMIT_FUNCTION
@@ -1881,6 +1914,9 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex
Instruction REGION(Register dst, Register src, uint32_t offset) {
return internal::RegionInstruction(dst, src, offset).convert();
}
+ Instruction SIMD_SHUFFLE(Type type, Register dst, Register src0, Register src1) {
+ return internal::SimdShuffleInstruction(type, dst, src0, src1).convert();
+ }
Instruction INDIRECT_MOV(Type type, Register dst, Register src0, Register src1, uint32_t offset) {
return internal::IndirectMovInstruction(type, dst, src0, src1, offset).convert();
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index ec4d00d..cf8d839 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -522,6 +522,14 @@ namespace ir {
static bool isClassOf(const Instruction &insn);
};
+ /*! simd shuffle */
+ class SimdShuffleInstruction : public Instruction {
+ public:
+ Type getType(void) const;
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
/*! return a region of a register, make sure the offset does not exceed the register size */
class RegionInstruction : public Instruction {
public:
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index 1001837..81548c9 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -59,7 +59,7 @@ DECL_INSN(BSB, BinaryInstruction)
DECL_INSN(OR, BinaryInstruction)
DECL_INSN(XOR, BinaryInstruction)
DECL_INSN(AND, BinaryInstruction)
-DECL_INSN(SIMD_SHUFFLE, BinaryInstruction)
+DECL_INSN(SIMD_SHUFFLE, SimdShuffleInstruction)
DECL_INSN(SEL, SelectInstruction)
DECL_INSN(EQ, CompareInstruction)
DECL_INSN(NE, CompareInstruction)
--
1.9.1
More information about the Beignet
mailing list