[Beignet] [PATCH V2 3/6] Change constant buffer load to ia move.
Yang Rong
rong.r.yang at intel.com
Thu Apr 18 00:18:04 PDT 2013
Need to patch addrIMM after allocate the curbe.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
backend/src/backend/gen_context.cpp | 33 +++++++++++++++-
backend/src/backend/gen_context.hpp | 6 ++-
.../src/backend/gen_insn_gen7_schedule_info.hxx | 1 +
backend/src/backend/gen_insn_selection.cpp | 40 +++++++++++++++++---
backend/src/backend/gen_insn_selection.hxx | 1 +
backend/src/backend/gen_register.hpp | 10 +++++
6 files changed, 82 insertions(+), 9 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index b4c9a65..54c6d77 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -144,7 +144,7 @@ namespace gbe
}
}
- void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
+ void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
const GenRegister dst = ra->genReg(insn.dst(0));
const GenRegister src0 = ra->genReg(insn.src(0));
const GenRegister src1 = ra->genReg(insn.src(1));
@@ -219,6 +219,37 @@ namespace gbe
p->JMPI(src);
}
+ void GenContext::emitCBMoveInstruction(const SelectionInstruction &insn) {
+ const GenRegister dst = ra->genReg(insn.dst(0));
+ const GenRegister src0 = ra->genReg(insn.src(0));
+ const GenRegister src1 = ra->genReg(insn.src(1));
+ const GenRegister src2 = ra->genReg(insn.src(2));
+ const GenRegister a0 = GenRegister::addr8(0);
+
+ p->ADD(src0, src1, GenRegister::negate(src2));
+
+ this->cbMovePos.push_back(std::make_pair(ir::Register(insn.src(2).value.reg), p->store.size()));
+ p->push();
+ p->curr.execWidth = 8;
+ p->MOV(a0, GenRegister::unpacked_uw(src0.nr, 0));
+ //p->ADD(a0, a0, GenRegister::immuw(0));
+ p->MOV(dst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8));
+ p->pop();
+
+ if (simdWidth == 16) {
+ const GenRegister dst1 = GenRegister::Qn(dst, 1);
+ const GenRegister src0_1 = GenRegister::Qn(GenRegister::unpacked_uw(src0.nr, 0), 1);
+ this->cbMovePos.push_back(std::make_pair(ir::Register(insn.src(2).value.reg), p->store.size()));
+ p->push();
+ p->curr.execWidth = 8;
+ p->curr.quarterControl = GEN_COMPRESSION_Q2;
+ p->MOV(a0, src0_1);
+ //p->ADD(a0, a0, GenRegister::immuw(0));
+ p->MOV(dst1, GenRegister::indirect(dst1.type, 0, GEN_WIDTH_8));
+ p->pop();
+ }
+ }
+
void GenContext::emitEotInstruction(const SelectionInstruction &insn) {
p->push();
p->curr.predicate = GEN_PREDICATE_NONE;
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 6af174f..8aabcd8 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
@@ -80,6 +80,7 @@ namespace gbe
void emitTernaryInstruction(const SelectionInstruction &insn);
void emitCompareInstruction(const SelectionInstruction &insn);
void emitJumpInstruction(const SelectionInstruction &insn);
+ void emitCBMoveInstruction(const SelectionInstruction &insn);
void emitEotInstruction(const SelectionInstruction &insn);
void emitNoOpInstruction(const SelectionInstruction &insn);
void emitWaitInstruction(const SelectionInstruction &insn);
@@ -91,13 +92,14 @@ namespace gbe
void emitByteScatterInstruction(const SelectionInstruction &insn);
void emitSampleInstruction(const SelectionInstruction &insn);
void emitTypedWriteInstruction(const SelectionInstruction &insn);
-
/*! Implements base class */
virtual Kernel *allocateKernel(void);
/*! Store the position of each label instruction in the Gen ISA stream */
map<ir::LabelIndex, uint32_t> labelPos;
/*! Store the Gen instructions to patch */
vector<std::pair<ir::LabelIndex, uint32_t>> branchPos2;
+ /*! Store the constant buffer ia mov instructions to patch */
+ vector<std::pair<ir::Register, uint32_t>> cbMovePos;
/*! Encode Gen ISA */
GenEncoder *p;
/*! Instruction selection on Gen ISA (pre-register allocation) */
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index 969ec82..19ebaa6 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -4,6 +4,7 @@ DECL_GEN7_SCHEDULE(Unary, 20, 4, 2)
DECL_GEN7_SCHEDULE(Binary, 20, 4, 2)
DECL_GEN7_SCHEDULE(Ternary, 20, 4, 2)
DECL_GEN7_SCHEDULE(Compare, 20, 4, 2)
+DECL_GEN7_SCHEDULE(CBMove, 40, 2, 2)
DECL_GEN7_SCHEDULE(Jump, 14, 1, 1)
DECL_GEN7_SCHEDULE(Eot, 20, 1, 1)
DECL_GEN7_SCHEDULE(NoOp, 20, 2, 2)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index e0e8920..67726be 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
@@ -25,7 +25,7 @@
/* This is the instruction selection code. First of all, this is a bunch of c++
* crap. Sorry if this is not that readable. Anyway, the goal here is to take
* GenIR code (i.e. the very regular, very RISC IR) and to produce GenISA with
- * virtual registers (i.e. regular GenIR registers).
+ * virtual registers (i.e. regular GenIR registers).
*
* Overall idea:
* =============
@@ -72,7 +72,7 @@
* *same* flag register for the predicates (used for masking) and the
* conditional modifier (used as a destination for CMP). This leads to extra
* complications with compare instructions and select instructions. Basically,
- * we need to insert extra MOVs.
+ * we need to insert extra MOVs.
*
* Also, there is some extra kludge to handle the predicates for JMPI.
*
@@ -435,6 +435,8 @@ namespace gbe
void LABEL(ir::LabelIndex label);
/*! Jump indexed instruction */
void JMPI(Reg src, ir::LabelIndex target);
+ /*!constant buffer mov instruction*/
+ void CB_MOVE(Reg dst, Reg src0, Reg src1, Reg src2);
/*! Compare instructions */
void CMP(uint32_t conditional, Reg src0, Reg src1);
/*! Select instruction with embedded comparison */
@@ -481,7 +483,7 @@ namespace gbe
static void markAllChildren(SelectionDAG &dag) {
// Do not merge anything, so all sources become roots
for (uint32_t childID = 0; childID < dag.childNum; ++childID)
- if (dag.child[childID])
+ if (dag.child[childID])
dag.child[childID]->isRoot = 1;
}
@@ -684,6 +686,14 @@ namespace gbe
insn->index = uint16_t(index);
}
+ void Selection::Opaque::CB_MOVE(Reg dst, Reg src0, Reg src1, Reg src2) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_CB_MOVE, 1, 3);
+ insn->src(0) = src0;
+ insn->src(1) = src1;
+ insn->src(2) = src2;
+ insn->dst(0) = dst;
+ }
+
void Selection::Opaque::CMP(uint32_t conditional, Reg src0, Reg src1) {
SelectionInstruction *insn = this->appendInsn(SEL_OP_CMP, 0, 2);
insn->src(0) = src0;
@@ -1057,7 +1067,7 @@ namespace gbe
// Implementation of all patterns
///////////////////////////////////////////////////////////////////////////
- GenRegister getRegisterFromImmediate(ir::Immediate imm)
+ GenRegister getRegisterFromImmediate(ir::Immediate imm)
{
using namespace ir;
switch (imm.type) {
@@ -1614,6 +1624,21 @@ namespace gbe
/*! Load instruction pattern */
DECL_PATTERN(LoadInstruction)
{
+ void emitCBMove(Selection::Opaque &sel, const ir::LoadInstruction &insn, GenRegister addr) const
+ {
+ using namespace ir;
+ GBE_ASSERT(insn.getValueNum() == 1); //todo: handle vec later
+
+ GenRegister offset = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
+ const GenRegister baseReg = sel.selReg(sel.ctx.getConstBaseReg(Register(addr.value.reg)), TYPE_U32);
+ //const uint32_t valueNum = insn.getValueNum();
+ //GenRegister dst[valueNum];
+ //for (uint32_t dstID = 0; dstID < valueNum; ++dstID)
+ const GenRegister dst = sel.selReg(insn.getValue(0), insn.getValueType());
+
+ sel.CB_MOVE(dst, offset, GenRegister::retype(addr, GEN_TYPE_UD), baseReg);
+ }
+
void emitUntypedRead(Selection::Opaque &sel,
const ir::LoadInstruction &insn,
GenRegister addr,
@@ -1659,10 +1684,13 @@ namespace gbe
const GenRegister address = sel.selReg(insn.getAddress());
const AddressSpace space = insn.getAddressSpace();
GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL ||
+ insn.getAddressSpace() == MEM_CONSTANT ||
insn.getAddressSpace() == MEM_PRIVATE ||
insn.getAddressSpace() == MEM_LOCAL);
GBE_ASSERT(sel.ctx.isScalarReg(insn.getValue(0)) == false);
- if (insn.isAligned() == true)
+ if(insn.getAddressSpace() == MEM_CONSTANT)
+ this->emitCBMove(sel, insn, address);
+ else if (insn.isAligned() == true)
this->emitUntypedRead(sel, insn, address, space == MEM_LOCAL ? 0xfe : 0x00);
else {
const GenRegister value = sel.selReg(insn.getValue(0));
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 691100b..f89ad4c 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -24,6 +24,7 @@ DECL_SELECTION_IR(SEL_CMP, CompareInstruction)
DECL_SELECTION_IR(MAD, TernaryInstruction)
DECL_SELECTION_IR(JMPI, JumpInstruction)
DECL_SELECTION_IR(EOT, EotInstruction)
+DECL_SELECTION_IR(CB_MOVE, CBMoveInstruction)
DECL_SELECTION_IR(NOP, NoOpInstruction)
DECL_SELECTION_IR(WAIT, WaitInstruction)
DECL_SELECTION_IR(MATH, MathInstruction)
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index 92122a6..d772b0d 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -725,6 +725,16 @@ namespace gbe
return ub16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
}
+ static INLINE GenRegister unpacked_uw(uint32_t nr, uint32_t subnr) {
+ return GenRegister(GEN_GENERAL_REGISTER_FILE,
+ nr,
+ subnr,
+ GEN_TYPE_UW,
+ GEN_VERTICAL_STRIDE_16,
+ GEN_WIDTH_8,
+ GEN_HORIZONTAL_STRIDE_2);
+ }
+
static INLINE GenRegister mask(uint32_t subnr) {
return uw1(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_MASK, subnr);
}
--
1.7.9.5
More information about the Beignet
mailing list