[Beignet] [PATCH V3] Use a separate pattern for simd shuffle instead of binary pattern
Song, Ruiling
ruiling.song at intel.com
Fri Jul 17 00:43:31 PDT 2015
LGTM
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Guo Yejun
> Sent: Friday, July 17, 2015 3:50 PM
> To: beignet at lists.freedesktop.org
> Cc: Guo, Yejun
> Subject: [Beignet] [PATCH V3] Use a separate pattern for simd shuffle instead of
> binary pattern
>
> the binary pattern assumes the two src operands have the same type, while simd
> shuffle is not the case, so add a separate pattern for it.
>
> v2: use a different way to obtain imm data
> do not use GenRegister::udxgrf, use sel.selReg instead
> add SimdShuffleInstruction::wellFormed
> v3: refine SimdShuffleInstruction::wellFormed
> set dag.child[0]->root = 1
>
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
> backend/src/backend/gen8_context.cpp | 38 ++++++------
> backend/src/backend/gen8_context.hpp | 1 +
> backend/src/backend/gen_context.cpp | 68 ++++++++++++----------
> backend/src/backend/gen_context.hpp | 1 +
> .../src/backend/gen_insn_gen7_schedule_info.hxx | 1 +
> backend/src/backend/gen_insn_selection.cpp | 63 ++++++++++++++++----
> backend/src/backend/gen_insn_selection.hxx | 2 +-
> backend/src/ir/instruction.cpp | 38 +++++++++++-
> backend/src/ir/instruction.hpp | 8 +++
> backend/src/ir/instruction.hxx | 2 +-
> 10 files changed, 158 insertions(+), 64 deletions(-)
>
> diff --git a/backend/src/backend/gen8_context.cpp
> b/backend/src/backend/gen8_context.cpp
> index 66bb54a..b497ee5 100644
> --- a/backend/src/backend/gen8_context.cpp
> +++ b/backend/src/backend/gen8_context.cpp
> @@ -254,6 +254,27 @@ namespace gbe
> }
> }
>
> + void Gen8Context::emitSimdShuffleInstruction(const SelectionInstruction
> &insn) {
> + const GenRegister dst = ra->genReg(insn.dst(0));
> + const GenRegister src0 = ra->genReg(insn.src(0));
> + const GenRegister src1 = ra->genReg(insn.src(1));
> + assert(insn.opcode == SEL_OP_SIMD_SHUFFLE);
> +
> + uint32_t simd = p->curr.execWidth;
> + if (src1.file == GEN_IMMEDIATE_VALUE) {
> + uint32_t offset = src1.value.ud % simd;
> + GenRegister reg = GenRegister::suboffset(src0, offset);
> + p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr /
> typeSize(reg.type)), reg.type));
> + } else {
> + uint32_t base = src0.nr * 32 + src0.subnr * 4;
> + GenRegister baseReg = GenRegister::immuw(base);
> + const GenRegister a0 = GenRegister::addr8(0);
> + p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr /
> typeSize(GEN_TYPE_UW)), baseReg);
> + GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
> + p->MOV(dst, indirect);
> + }
> + }
> +
> void Gen8Context::emitBinaryInstruction(const SelectionInstruction &insn) {
> const GenRegister dst = ra->genReg(insn.dst(0));
> const GenRegister src0 = ra->genReg(insn.src(0)); @@ -273,23 +294,6 @@
> namespace gbe
> p->ADD(dst, dst, src1);
> break;
> }
> - case SEL_OP_SIMD_SHUFFLE:
> - {
> - uint32_t simd = p->curr.execWidth;
> - if (src1.file == GEN_IMMEDIATE_VALUE) {
> - uint32_t offset = src1.value.ud % simd;
> - GenRegister reg = GenRegister::suboffset(src0, offset);
> - p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr /
> typeSize(reg.type)), reg.type));
> - } else {
> - uint32_t base = src0.nr * 32 + src0.subnr * 4;
> - GenRegister baseReg = GenRegister::immuw(base);
> - const GenRegister a0 = GenRegister::addr8(0);
> - p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr /
> typeSize(GEN_TYPE_UW)), baseReg);
> - GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
> - p->MOV(dst, indirect);
> - }
> - break;
> - }
> default:
> GenContext::emitBinaryInstruction(insn);
> }
> diff --git a/backend/src/backend/gen8_context.hpp
> b/backend/src/backend/gen8_context.hpp
> index 4f164ce..84508e9 100644
> --- a/backend/src/backend/gen8_context.hpp
> +++ b/backend/src/backend/gen8_context.hpp
> @@ -52,6 +52,7 @@ namespace gbe
>
> virtual void emitUnaryInstruction(const SelectionInstruction &insn);
> virtual void emitUnaryWithTempInstruction(const SelectionInstruction &insn);
> + virtual void emitSimdShuffleInstruction(const SelectionInstruction
> + &insn);
> virtual void emitBinaryInstruction(const SelectionInstruction &insn);
> virtual void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
> virtual void emitI64MULHIInstruction(const SelectionInstruction &insn); diff --
> git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index db27377..e16b0a9 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -545,6 +545,42 @@ namespace gbe
> }
> }
>
> + void GenContext::emitSimdShuffleInstruction(const SelectionInstruction &insn)
> {
> + const GenRegister dst = ra->genReg(insn.dst(0));
> + const GenRegister src0 = ra->genReg(insn.src(0));
> + const GenRegister src1 = ra->genReg(insn.src(1));
> + assert(insn.opcode == SEL_OP_SIMD_SHUFFLE);
> +
> + uint32_t simd = p->curr.execWidth;
> + if (src1.file == GEN_IMMEDIATE_VALUE) {
> + uint32_t offset = src1.value.ud % simd;
> + GenRegister reg = GenRegister::suboffset(src0, offset);
> + p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr /
> typeSize(reg.type)), reg.type));
> + } else {
> + uint32_t base = src0.nr * 32 + src0.subnr * 4;
> + GenRegister baseReg = GenRegister::immuw(base);
> + const GenRegister a0 = GenRegister::addr8(0);
> +
> + p->push();
> + if (simd == 8) {
> + p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr /
> typeSize(GEN_TYPE_UW)), baseReg);
> + GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
> + p->MOV(dst, indirect);
> + } else if (simd == 16) {
> + p->curr.execWidth = 8;
> + p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr /
> typeSize(GEN_TYPE_UW)), baseReg);
> + GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
> + p->MOV(dst, indirect);
> +
> + p->curr.quarterControl = 1;
> + p->ADD(a0, GenRegister::unpacked_uw(src1.nr+1, src1.subnr /
> typeSize(GEN_TYPE_UW)), baseReg);
> + p->MOV(GenRegister::offset(dst, 1, 0), indirect);
> + } else
> + NOT_IMPLEMENTED;
> + p->pop();
> + }
> + }
> +
> void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
> const GenRegister dst = ra->genReg(insn.dst(0));
> const GenRegister src0 = ra->genReg(insn.src(0)); @@ -595,38 +631,6 @@
> namespace gbe
> p->MOV(xdst.bottom_half(), xsrc1.bottom_half());
> }
> break;
> - case SEL_OP_SIMD_SHUFFLE:
> - {
> - uint32_t simd = p->curr.execWidth;
> - if (src1.file == GEN_IMMEDIATE_VALUE) {
> - uint32_t offset = src1.value.ud % simd;
> - GenRegister reg = GenRegister::suboffset(src0, offset);
> - p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr /
> typeSize(reg.type)), reg.type));
> - } else {
> - uint32_t base = src0.nr * 32 + src0.subnr * 4;
> - GenRegister baseReg = GenRegister::immuw(base);
> - const GenRegister a0 = GenRegister::addr8(0);
> -
> - p->push();
> - if (simd == 8) {
> - p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr /
> typeSize(GEN_TYPE_UW)), baseReg);
> - GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
> - p->MOV(dst, indirect);
> - } else if (simd == 16) {
> - p->curr.execWidth = 8;
> - p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr /
> typeSize(GEN_TYPE_UW)), baseReg);
> - GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
> - p->MOV(dst, indirect);
> -
> - p->curr.quarterControl = 1;
> - p->ADD(a0, GenRegister::unpacked_uw(src1.nr+1, src1.subnr /
> typeSize(GEN_TYPE_UW)), baseReg);
> - p->MOV(GenRegister::offset(dst, 1, 0), indirect);
> - } else
> - NOT_IMPLEMENTED;
> - p->pop();
> - }
> - }
> - break;
> default: NOT_IMPLEMENTED;
> }
> }
> diff --git a/backend/src/backend/gen_context.hpp
> b/backend/src/backend/gen_context.hpp
> index d387387..69fe513 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -127,6 +127,7 @@ namespace gbe
> virtual void emitUnaryInstruction(const SelectionInstruction &insn);
> virtual void emitUnaryWithTempInstruction(const SelectionInstruction &insn);
> virtual void emitBinaryInstruction(const SelectionInstruction &insn);
> + virtual void emitSimdShuffleInstruction(const SelectionInstruction
> + &insn);
> virtual void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
> void emitTernaryInstruction(const SelectionInstruction &insn);
> virtual void emitI64MULHIInstruction(const SelectionInstruction &insn); diff --
> git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> index d054820..d073770 100644
> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> @@ -3,6 +3,7 @@ DECL_GEN7_SCHEDULE(Label, 0, 0, 0)
> DECL_GEN7_SCHEDULE(Unary, 20, 4, 2)
> DECL_GEN7_SCHEDULE(UnaryWithTemp, 20, 40, 20)
> DECL_GEN7_SCHEDULE(Binary, 20, 4, 2)
> +DECL_GEN7_SCHEDULE(SimdShuffle, 20, 4, 2)
> DECL_GEN7_SCHEDULE(BinaryWithTemp, 20, 40, 20)
> DECL_GEN7_SCHEDULE(Ternary, 20, 4, 2)
> DECL_GEN7_SCHEDULE(I64Shift, 20, 40, 20)
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index af5ab9c..b0ba9e3 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -543,7 +543,6 @@ namespace gbe
> ALU1(RNDD)
> ALU1(RNDU)
> ALU2(MACH)
> - ALU2(SIMD_SHUFFLE)
> ALU1(LZD)
> ALU3(MAD)
> ALU2WithTemp(MUL_HI)
> @@ -565,6 +564,8 @@ namespace gbe
> #undef ALU2WithTemp
> #undef ALU3
> #undef I64Shift
> + /*! simd shuffle */
> + void SIMD_SHUFFLE(Reg dst, Reg src0, Reg src1);
> /*! Convert 64-bit integer to 32-bit float */
> void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[6]);
> /*! Convert 64-bit integer to 32-bit float */ @@ -1652,6 +1653,14 @@
> namespace gbe
> insn->src(2) = src2;
> }
>
> + void Selection::Opaque::SIMD_SHUFFLE(Reg dst, Reg src0, Reg src1) {
> + SelectionInstruction *insn = this->appendInsn(SEL_OP_SIMD_SHUFFLE, 1, 2);
> + insn->dst(0) = dst;
> + insn->src(0) = src0;
> + insn->src(1) = src1;
> + }
> +
> void Selection::Opaque::I64CMP(uint32_t conditional, Reg src0, Reg src1,
> GenRegister tmp[3]) {
> SelectionInstruction *insn = this->appendInsn(SEL_OP_I64CMP, 3, 2);
> insn->src(0) = src0;
> @@ -2815,17 +2824,6 @@ namespace gbe
> case OP_UPSAMPLE_LONG:
> sel.UPSAMPLE_LONG(dst, src0, src1);
> break;
> - case OP_SIMD_SHUFFLE:
> - {
> - if (src1.file == GEN_IMMEDIATE_VALUE)
> - sel.SIMD_SHUFFLE(dst, src0, src1);
> - else {
> - GenRegister shiftL = GenRegister::udxgrf(sel.curr.execWidth,
> sel.reg(FAMILY_DWORD));
> - sel.SHL(shiftL, src1, GenRegister::immud(0x2));
> - sel.SIMD_SHUFFLE(dst, src0, shiftL);
> - }
> - }
> - break;
> default: NOT_IMPLEMENTED;
> }
> sel.pop();
> @@ -4973,6 +4971,46 @@ namespace gbe
> }
> };
>
> + class SimdShuffleInstructionPattern : public SelectionPattern {
> + public:
> + SimdShuffleInstructionPattern(void) : SelectionPattern(1,1) {
> + this->opcodes.push_back(ir::OP_SIMD_SHUFFLE);
> + }
> + INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
> + using namespace ir;
> + const ir::SimdShuffleInstruction &insn =
> cast<SimdShuffleInstruction>(dag.insn);
> + assert(insn.getOpcode() == OP_SIMD_SHUFFLE);
> + const Type type = insn.getType();
> + GenRegister dst = sel.selReg(insn.getDst(0), type);
> + GenRegister src0 = sel.selReg(insn.getSrc(0), type);
> + GenRegister src1;
> +
> + SelectionDAG *dag0 = dag.child[0];
> + SelectionDAG *dag1 = dag.child[1];
> + if (dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI &&
> canGetRegisterFromImmediate(dag1->insn)) {
> + const auto &childInsn = cast<LoadImmInstruction>(dag1->insn);
> + src1 = getRegisterFromImmediate(childInsn.getImmediate(), TYPE_U32);
> + if (dag0) dag0->isRoot = 1;
> + } else {
> + markAllChildren(dag);
> + src1 = sel.selReg(insn.getSrc(1), TYPE_U32);
> + }
> +
> + sel.push();
> + if (src1.file == GEN_IMMEDIATE_VALUE)
> + sel.SIMD_SHUFFLE(dst, src0, src1);
> + else {
> + GenRegister shiftL = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> + sel.SHL(shiftL, src1, GenRegister::immud(0x2));
> + sel.SIMD_SHUFFLE(dst, src0, shiftL);
> + }
> + sel.pop();
> + return true;
> + }
> +
> + };
> +
> /*! Get a region of a register */
> class RegionInstructionPattern : public SelectionPattern
> {
> @@ -5247,6 +5285,7 @@ namespace gbe
> this->insert<GetImageInfoInstructionPattern>();
> this->insert<ReadARFInstructionPattern>();
> this->insert<RegionInstructionPattern>();
> + this->insert<SimdShuffleInstructionPattern>();
> this->insert<IndirectMovInstructionPattern>();
> this->insert<NullaryInstructionPattern>();
>
> diff --git a/backend/src/backend/gen_insn_selection.hxx
> b/backend/src/backend/gen_insn_selection.hxx
> index 79f2ce1..adbb137 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -26,7 +26,7 @@ DECL_SELECTION_IR(SHL, BinaryInstruction)
> DECL_SELECTION_IR(RSR, BinaryInstruction) DECL_SELECTION_IR(RSL,
> BinaryInstruction) DECL_SELECTION_IR(ASR, BinaryInstruction) -
> DECL_SELECTION_IR(SIMD_SHUFFLE, BinaryInstruction)
> +DECL_SELECTION_IR(SIMD_SHUFFLE, SimdShuffleInstruction)
> DECL_SELECTION_IR(I64SHR, I64ShiftInstruction) DECL_SELECTION_IR(I64SHL,
> I64ShiftInstruction) DECL_SELECTION_IR(I64ASR, I64ShiftInstruction) diff --git
> a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index
> 12d70a6..f93c528 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -741,6 +741,22 @@ namespace ir {
> Register src[0];
> };
>
> + class ALIGNED_INSTRUCTION SimdShuffleInstruction : public
> NaryInstruction<2>
> + {
> + public:
> + SimdShuffleInstruction(Type type,
> + Register dst,
> + Register src0,
> + Register src1) {
> + this->opcode = OP_SIMD_SHUFFLE;
> + this->type = type;
> + this->dst[0] = dst;
> + this->src[0] = src0;
> + this->src[1] = src1;
> + }
> + INLINE bool wellFormed(const Function &fn, std::string &why) const;
> + };
> +
> class ALIGNED_INSTRUCTION RegionInstruction :
> public BasePolicy,
> public NSrcPolicy<RegionInstruction, 1>, @@ -1154,6 +1170,19 @@
> namespace ir {
> return true;
> }
>
> + INLINE bool SimdShuffleInstruction::wellFormed(const Function &fn,
> std::string &whyNot) const
> + {
> + if (UNLIKELY( this->type != TYPE_U32 && this->type != TYPE_S32 && this-
> >type != TYPE_FLOAT)) {
> + whyNot = "Only support S32/U32/FLOAT type";
> + return false;
> + }
> +
> + if (UNLIKELY(checkRegisterData(FAMILY_DWORD, src[1], fn, whyNot) ==
> false))
> + return false;
> +
> + return true;
> + }
> +
> INLINE bool RegionInstruction::wellFormed(const Function &fn, std::string
> &whyNot) const
> {
> if (UNLIKELY(checkRegisterData(FAMILY_DWORD, src[0], fn, whyNot) ==
> false)) @@ -1461,6 +1490,10 @@ START_INTROSPECTION(RegionInstruction)
> #include "ir/instruction.hxx"
> END_INTROSPECTION(RegionInstruction)
>
> +START_INTROSPECTION(SimdShuffleInstruction)
> +#include "ir/instruction.hxx"
> +END_INTROSPECTION(SimdShuffleInstruction)
> +
> START_INTROSPECTION(IndirectMovInstruction)
> #include "ir/instruction.hxx"
> END_INTROSPECTION(IndirectMovInstruction)
> @@ -1652,6 +1685,7 @@ DECL_MEM_FN(BranchInstruction, LabelIndex,
> getLabelIndex(void), getLabelIndex()) DECL_MEM_FN(SyncInstruction, uint32_t,
> getParameters(void), getParameters()) DECL_MEM_FN(ReadARFInstruction,
> Type, getType(void), getType()) DECL_MEM_FN(ReadARFInstruction,
> ARFRegister, getARFRegister(void), getARFRegister())
> +DECL_MEM_FN(SimdShuffleInstruction, Type, getType(void), getType())
> DECL_MEM_FN(RegionInstruction, uint32_t, getOffset(void), getOffset())
> DECL_MEM_FN(IndirectMovInstruction, uint32_t, getOffset(void), getOffset())
> DECL_MEM_FN(IndirectMovInstruction, Type, getType(void), getType()) @@ -
> 1751,7 +1785,6 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t,
> getImageIndex(void), getImageIndex
> DECL_EMIT_FUNCTION(RHADD)
> DECL_EMIT_FUNCTION(I64HADD)
> DECL_EMIT_FUNCTION(I64RHADD)
> - DECL_EMIT_FUNCTION(SIMD_SHUFFLE)
>
> #undef DECL_EMIT_FUNCTION
>
> @@ -1881,6 +1914,9 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t,
> getImageIndex(void), getImageIndex
> Instruction REGION(Register dst, Register src, uint32_t offset) {
> return internal::RegionInstruction(dst, src, offset).convert();
> }
> + Instruction SIMD_SHUFFLE(Type type, Register dst, Register src0, Register
> src1) {
> + return internal::SimdShuffleInstruction(type, dst, src0,
> + src1).convert(); }
>
> Instruction INDIRECT_MOV(Type type, Register dst, Register src0, Register
> src1, uint32_t offset) {
> return internal::IndirectMovInstruction(type, dst, src0, src1, offset).convert();
> diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
> index ec4d00d..cf8d839 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -522,6 +522,14 @@ namespace ir {
> static bool isClassOf(const Instruction &insn);
> };
>
> + /*! simd shuffle */
> + class SimdShuffleInstruction : public Instruction {
> + public:
> + Type getType(void) const;
> + /*! Return true if the given instruction is an instance of this class */
> + static bool isClassOf(const Instruction &insn); };
> +
> /*! return a region of a register, make sure the offset does not exceed the
> register size */
> class RegionInstruction : public Instruction {
> public:
> diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index
> 1001837..81548c9 100644
> --- a/backend/src/ir/instruction.hxx
> +++ b/backend/src/ir/instruction.hxx
> @@ -59,7 +59,7 @@ DECL_INSN(BSB, BinaryInstruction) DECL_INSN(OR,
> BinaryInstruction) DECL_INSN(XOR, BinaryInstruction) DECL_INSN(AND,
> BinaryInstruction) -DECL_INSN(SIMD_SHUFFLE, BinaryInstruction)
> +DECL_INSN(SIMD_SHUFFLE, SimdShuffleInstruction)
> DECL_INSN(SEL, SelectInstruction)
> DECL_INSN(EQ, CompareInstruction)
> DECL_INSN(NE, CompareInstruction)
> --
> 1.9.1
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list