[Beignet] [PATCH 4/8] BDW: Refine BDW's int 32*32 multiply.
Yang Rong
rong.r.yang at intel.com
Sun Sep 28 22:38:33 PDT 2014
BDW support int32 * int32 directly. So add a flag to selection for it.
BDW use int32*int16 when use acc. Because int32*int16 also work in IVB,
change to int32*int16 when use acc.
Need refine int32*int32 to long later.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
backend/src/backend/gen8_context.cpp | 2 +-
backend/src/backend/gen_context.cpp | 5 ++--
backend/src/backend/gen_insn_selection.cpp | 46 +++++++++++++++++++-----------
backend/src/backend/gen_insn_selection.hpp | 7 +++++
4 files changed, 40 insertions(+), 20 deletions(-)
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index a9914f6..7247682 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -107,7 +107,7 @@ namespace gbe
}
void Gen8Context::newSelection(void) {
- this->sel = GBE_NEW(Selection75, *this);
+ this->sel = GBE_NEW(Selection8, *this);
}
}
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 81758eb..c2412d8 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -346,7 +346,8 @@ namespace gbe
p->push();
p->curr.predicate = GEN_PREDICATE_NONE;
p->curr.noMask = 1;
- p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), src0, src1);
+ p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), src0,
+ GenRegister::h2(GenRegister::retype(src1, GEN_TYPE_UW)));
p->curr.accWrEnable = 1;
p->MACH(tmp, src0, src1);
p->pop();
@@ -1262,7 +1263,7 @@ namespace gbe
p->push();
p->curr.execWidth = 8;
for(int i = 0; i < execWidth; i += 8) {
- p->MUL(acc, src0, src1);
+ p->MUL(acc, src0, GenRegister::h2(GenRegister::retype(src1, GEN_TYPE_UW)));
p->curr.accWrEnable = 1;
p->MACH(high, src0, src1);
p->curr.accWrEnable = 0;
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index e3ee35d..dc10fa4 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -343,6 +343,8 @@ namespace gbe
/*! should add per thread offset to the local memory address when load/store/atomic */
bool needPatchSLMAddr() const { return patchSLMAddr; }
void setPatchSLMAddr(bool b) { patchSLMAddr = b; }
+ bool has32X32Mul() const { return bHas32X32Mul; }
+ void setHas32X32Mul(bool b) { bHas32X32Mul = b; }
/*! indicate whether a register is a scalar/uniform register. */
INLINE bool isScalarReg(const ir::Register ®) const {
const ir::RegisterData ®Data = getRegisterData(reg);
@@ -625,6 +627,7 @@ namespace gbe
/*! Auxiliary label for if/endif. */
uint16_t currAuxLabel;
bool patchSLMAddr;
+ bool bHas32X32Mul;
INLINE ir::LabelIndex newAuxLabel()
{
currAuxLabel++;
@@ -663,7 +666,8 @@ namespace gbe
ctx(ctx), block(NULL),
curr(ctx.getSimdWidth()), file(ctx.getFunction().getRegisterFile()),
maxInsnNum(ctx.getFunction().getLargestBlockSize()), dagPool(maxInsnNum),
- stateNum(0), vectorNum(0), bwdCodeGeneration(false), currAuxLabel(ctx.getFunction().labelNum()), patchSLMAddr(false)
+ stateNum(0), vectorNum(0), bwdCodeGeneration(false), currAuxLabel(ctx.getFunction().labelNum()),
+ patchSLMAddr(false), bHas32X32Mul(false)
{
const ir::Function &fn = ctx.getFunction();
this->regNum = fn.regNum();
@@ -1667,6 +1671,11 @@ namespace gbe
this->opaque->setPatchSLMAddr(true);
}
+ Selection8::Selection8(GenContext &ctx) : Selection(ctx) {
+ this->opaque->setPatchSLMAddr(true);
+ this->opaque->setHas32X32Mul(true);
+ }
+
void Selection::Opaque::TYPED_WRITE(GenRegister *msgs, uint32_t msgNum,
uint32_t bti, bool is3D) {
uint32_t elemID = 0;
@@ -2444,18 +2453,23 @@ namespace gbe
using namespace ir;
const ir::BinaryInstruction &insn = cast<ir::BinaryInstruction>(dag.insn);
const Type type = insn.getType();
- if (type == TYPE_U32 || type == TYPE_S32) {
+ if (type != TYPE_U32 && type != TYPE_S32)
+ return false;
+
+ GenRegister dst = sel.selReg(insn.getDst(0), type);
+ GenRegister src0 = sel.selReg(insn.getSrc(0), type);
+ GenRegister src1 = sel.selReg(insn.getSrc(1), type);
+ if (sel.has32X32Mul()) {
+ sel.MUL(dst, src0, src1);
+ } else {
sel.push();
- if (sel.isScalarReg(insn.getDst(0)) == true) {
- sel.curr.execWidth = 1;
- sel.curr.predicate = GEN_PREDICATE_NONE;
- sel.curr.noMask = 1;
- }
- const uint32_t simdWidth = sel.curr.execWidth;
+ if (sel.isScalarReg(insn.getDst(0)) == true) {
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ }
- GenRegister dst = sel.selReg(insn.getDst(0), type);
- GenRegister src0 = sel.selReg(insn.getSrc(0), type);
- GenRegister src1 = sel.selReg(insn.getSrc(1), type);
+ const int simdWidth = sel.curr.execWidth;
// Either left part of the 16-wide register or just a simd 8 register
dst = GenRegister::retype(dst, GEN_TYPE_D);
@@ -2498,13 +2512,11 @@ namespace gbe
} else
sel.MOV(GenRegister::retype(GenRegister::next(dst), GEN_TYPE_F), GenRegister::acc());
}
-
sel.pop();
- // All children are marked as root
- markAllChildren(dag);
- return true;
- } else
- return false;
+ }
+ // All children are marked as root
+ markAllChildren(dag);
+ return true;
}
};
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 9bcce6f..e39aa6e 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -284,6 +284,13 @@ namespace gbe
Selection75(GenContext &ctx);
};
+ class Selection8: public Selection
+ {
+ public:
+ /*! Initialize internal structures used for the selection */
+ Selection8(GenContext &ctx);
+ };
+
} /* namespace gbe */
#endif /* __GEN_INSN_SELECTION_HPP__ */
--
1.8.3.2
More information about the Beignet
mailing list