[Beignet] [PATCH 3/8] GBE: extend registers/tuples/immediates to 32bit wide.
Zhigang Gong
zhigang.gong at intel.com
Tue Mar 31 19:05:38 PDT 2015
For some extremly large kernel, these values may be larger than
0xFFFF, we have to extend them to 32 bit.
Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
backend/src/backend/gen_insn_selection.cpp | 18 +++++++++---------
backend/src/backend/gen_reg_allocation.cpp | 12 ++++++------
backend/src/backend/gen_register.hpp | 4 ++--
backend/src/ir/immediate.hpp | 2 +-
backend/src/ir/instruction.cpp | 7 ++++---
backend/src/ir/instruction.hpp | 4 ++--
backend/src/ir/register.hpp | 12 ++++++------
7 files changed, 30 insertions(+), 29 deletions(-)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 0f5e496..5586468 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2001,7 +2001,7 @@ namespace gbe
if (sel.getRegisterFamily(insn.getDst(0)) == ir::FAMILY_BOOL &&
dag->isUsed) {
sel.curr.physicalFlag = 0;
- sel.curr.flagIndex = (uint16_t)(insn.getDst(0));
+ sel.curr.flagIndex = (uint32_t)(insn.getDst(0));
sel.curr.modFlag = 1;
}
sel.MOV(dst, src);
@@ -2209,7 +2209,7 @@ namespace gbe
insn.getOpcode() == OP_OR ||
insn.getOpcode() == OP_XOR);
sel.curr.physicalFlag = 0;
- sel.curr.flagIndex = (uint16_t)(insn.getDst(0));
+ sel.curr.flagIndex = (uint32_t)(insn.getDst(0));
sel.curr.modFlag = 1;
}
@@ -2782,7 +2782,7 @@ namespace gbe
if (!sel.isScalarReg(insn.getDst(0)) && sel.regDAG[insn.getDst(0)]->isUsed) {
sel.curr.modFlag = 1;
sel.curr.physicalFlag = 0;
- sel.curr.flagIndex = (uint16_t) insn.getDst(0);
+ sel.curr.flagIndex = (uint32_t) insn.getDst(0);
}
sel.MOV(dst, imm.getIntegerValue() ? GenRegister::immuw(0xffff) : GenRegister::immuw(0));
break;
@@ -3042,7 +3042,7 @@ namespace gbe
sel.curr.physicalFlag = 0;
sel.curr.modFlag = 1;
sel.curr.predicate = GEN_PREDICATE_NONE;
- sel.curr.flagIndex = (uint16_t)alignedFlag;
+ sel.curr.flagIndex = (uint32_t)alignedFlag;
sel.CMP(GEN_CONDITIONAL_NEQ, GenRegister::unpacked_uw(shiftHReg), GenRegister::immuw(32));
sel.pop();
@@ -3055,7 +3055,7 @@ namespace gbe
// Only need to consider the tmpH when the addr is not aligned.
sel.curr.modFlag = 0;
sel.curr.physicalFlag = 0;
- sel.curr.flagIndex = (uint16_t)alignedFlag;
+ sel.curr.flagIndex = (uint32_t)alignedFlag;
sel.curr.predicate = GEN_PREDICATE_NORMAL;
sel.SHL(tmpH, tmp[i + 1], shiftH);
sel.OR(effectData[i], tmpL, tmpH);
@@ -3377,7 +3377,7 @@ namespace gbe
sel.curr.noMask = 1;
sel.curr.physicalFlag = 0;
sel.curr.modFlag = 1;
- sel.curr.flagIndex = (uint16_t)dst;
+ sel.curr.flagIndex = (uint32_t)dst;
sel.curr.grfFlag = needStoreBool; // indicate whether we need to allocate grf to store this boolean.
if (type == TYPE_S64 || type == TYPE_U64) {
GenRegister tmp[3];
@@ -3791,7 +3791,7 @@ namespace gbe
}
sel.curr.inversePredicate ^= inverse;
sel.curr.physicalFlag = 0;
- sel.curr.flagIndex = (uint16_t) pred;
+ sel.curr.flagIndex = (uint32_t) pred;
sel.curr.predicate = GEN_PREDICATE_NORMAL;
// FIXME in general, if the flag is a uniform flag.
// we should treat that flag as extern flag, as we
@@ -4204,7 +4204,7 @@ namespace gbe
// as if there is no backward jump latter, then obviously everything will work fine.
// If there is backward jump latter, then all the pcip will be updated correctly there.
sel.curr.physicalFlag = 0;
- sel.curr.flagIndex = (uint16_t) pred;
+ sel.curr.flagIndex = (uint32_t) pred;
sel.curr.predicate = GEN_PREDICATE_NORMAL;
sel.MOV(ip, GenRegister::immuw(uint16_t(dst)));
sel.curr.predicate = GEN_PREDICATE_NONE;
@@ -4261,7 +4261,7 @@ namespace gbe
GBE_ASSERT(jip == dst);
sel.push();
sel.curr.physicalFlag = 0;
- sel.curr.flagIndex = (uint16_t) pred;
+ sel.curr.flagIndex = (uint32_t) pred;
sel.curr.predicate = GEN_PREDICATE_NORMAL;
sel.MOV(ip, GenRegister::immuw(uint16_t(dst)));
sel.block->endifOffset = -1;
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index 26078e0..a5d601a 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -54,14 +54,14 @@ namespace gbe
};
typedef struct GenRegIntervalKey {
- GenRegIntervalKey(uint16_t reg, int32_t maxID) {
- key = ((uint64_t)maxID << 16) | reg;
+ GenRegIntervalKey(uint32_t reg, int32_t maxID) {
+ key = ((uint64_t)maxID << 32) | reg;
}
const ir::Register getReg() const {
- return (ir::Register)(key & 0xFFFF);
+ return (ir::Register)(key & 0xFFFFFFFF);
}
int32_t getMaxID() const {
- return key >> 16;
+ return key >> 32;
}
uint64_t key;
} GenRegIntervalKey;
@@ -126,9 +126,9 @@ namespace gbe
/*! Allocate the virtual boolean (== flags) registers */
void allocateFlags(Selection &selection);
/*! validated flags which contains valid value in the physical flag register */
- set<uint16_t> validatedFlags;
+ set<uint32_t> validatedFlags;
/*! validated temp flag register which indicate the flag 0,1 contains which virtual flag register. */
- uint16_t validTempFlagReg;
+ uint32_t validTempFlagReg;
/*! validate flag for the current flag user instruction */
void validateFlag(Selection &selection, SelectionInstruction &insn);
/*! Allocate the GRF registers */
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index d539937..e166af4 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -132,7 +132,6 @@ namespace gbe
uint32_t physicalFlag:1; //!< Physical or virtual flag register
uint32_t flag:1; //!< Only if physical flag,
uint32_t subFlag:1; //!< Only if physical flag
- uint32_t flagIndex:16; //!< Only if virtual flag (index of the register)
uint32_t grfFlag:1; //!< Only if virtual flag, 0 means we do not need to allocate GRF.
uint32_t externFlag:1; //!< Only if virtual flag, 1 means this flag is from external BB.
uint32_t modFlag:1; //!< Only if virtual flag, 1 means will modify flag.
@@ -146,6 +145,7 @@ namespace gbe
uint32_t predicate:4;
uint32_t inversePredicate:1;
uint32_t saturate:1;
+ uint32_t flagIndex; //!< Only if virtual flag (index of the register)
void chooseNib(int nib) {
switch (nib) {
case 0:
@@ -240,7 +240,7 @@ namespace gbe
float f;
int32_t d;
uint32_t ud;
- uint16_t reg;
+ uint32_t reg;
int64_t i64;
} value;
diff --git a/backend/src/ir/immediate.hpp b/backend/src/ir/immediate.hpp
index 10bd035..6b27e8b 100644
--- a/backend/src/ir/immediate.hpp
+++ b/backend/src/ir/immediate.hpp
@@ -345,7 +345,7 @@ namespace ir {
}
/*! A value is stored in a per-function vector. This is the index to it */
- TYPE_SAFE(ImmediateIndex, uint16_t)
+ TYPE_SAFE(ImmediateIndex, uint32_t)
} /* namespace ir */
} /* namespace gbe */
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 039f085..8bd19b6 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -741,7 +741,7 @@ namespace ir {
const Function &fn,
std::string &whyNot)
{
- if (UNLIKELY(uint16_t(ID) >= fn.regNum())) {
+ if (UNLIKELY(uint32_t(ID) >= fn.regNum())) {
whyNot = "Out-of-bound destination register index";
return false;
}
@@ -885,8 +885,9 @@ namespace ir {
return false;
const RegisterFamily family = getFamily(this->type);
for (uint32_t srcID = 0; srcID < 2; ++srcID)
- if (UNLIKELY(checkRegisterData(family, src[srcID], fn, whyNot) == false))
+ if (UNLIKELY(checkRegisterData(family, src[srcID], fn, whyNot) == false)) {
return false;
+ }
return true;
}
@@ -1283,7 +1284,7 @@ namespace ir {
return HelperIntrospection<CLASS, RefClass>::value == 1;
#define START_INTROSPECTION(CLASS) \
- static_assert(sizeof(internal::CLASS) == (sizeof(uint64_t)*2), \
+ static_assert(sizeof(internal::CLASS) == (sizeof(uint64_t)*4), \
"Bad instruction size"); \
static_assert(offsetof(internal::CLASS, opcode) == 0, \
"Bad opcode offset"); \
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 47312f5..37f64af 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -103,7 +103,7 @@ namespace ir {
///////////////////////////////////////////////////////////////////////////
/*! Stores instruction internal data and opcode */
- class ALIGNED(sizeof(uint64_t)*2) InstructionBase
+ class ALIGNED(sizeof(uint64_t)*4) InstructionBase
{
public:
/*! Initialize the instruction from a 8 bytes stream */
@@ -117,7 +117,7 @@ namespace ir {
/*! Get the instruction opcode */
INLINE Opcode getOpcode(void) const { return opcode; }
protected:
- enum { opaqueSize = sizeof(uint64_t)*2-sizeof(uint8_t) };
+ enum { opaqueSize = sizeof(uint64_t)*4-sizeof(uint8_t) };
Opcode opcode; //!< Idendifies the instruction
char opaque[opaqueSize]; //!< Remainder of it
GBE_CLASS(InstructionBase); //!< Use internal allocators
diff --git a/backend/src/ir/register.hpp b/backend/src/ir/register.hpp
index ce8bd60..be5f60d 100644
--- a/backend/src/ir/register.hpp
+++ b/backend/src/ir/register.hpp
@@ -111,7 +111,7 @@ namespace ir {
/*! Register is the position of the index of the register data in the register
* file. We enforce type safety with this class
*/
- TYPE_SAFE(Register, uint16_t)
+ TYPE_SAFE(Register, uint32_t)
INLINE bool operator< (const Register &r0, const Register &r1) {
return r0.value() < r1.value();
}
@@ -119,7 +119,7 @@ namespace ir {
/*! Tuple is the position of the first register in the tuple vector. We
* enforce type safety with this class
*/
- TYPE_SAFE(Tuple, uint16_t)
+ TYPE_SAFE(Tuple, uint32_t)
/*! A register file allocates and destroys registers. Basically, we will have
* one register file per function
@@ -131,7 +131,7 @@ namespace ir {
INLINE Register append(RegisterFamily family, bool uniform = false) {
GBE_ASSERTM(regNum() < MAX_INDEX,
"Too many defined registers (only 65535 are supported)");
- const uint16_t index = regNum();
+ const uint32_t index = regNum();
const RegisterData reg(family, uniform);
regs.push_back(reg);
return Register(index);
@@ -157,18 +157,18 @@ namespace ir {
INLINE void setUniform(Register index, bool uniform) { regs[index].setUniform(uniform); }
/*! Get the register index from the tuple */
INLINE Register get(Tuple index, uint32_t which) const {
- return regTuples[uint16_t(index) + which];
+ return regTuples[uint32_t(index) + which];
}
/*! Set the register index from the tuple */
INLINE void set(Tuple index, uint32_t which, Register reg) {
- regTuples[uint16_t(index) + which] = reg;
+ regTuples[uint32_t(index) + which] = reg;
}
/*! Number of registers in the register file */
INLINE uint32_t regNum(void) const { return regs.size(); }
/*! Number of tuples in the register file */
INLINE uint32_t tupleNum(void) const { return regTuples.size(); }
/*! register and tuple indices are short */
- enum { MAX_INDEX = 0xffff };
+ enum { MAX_INDEX = 0xffffffff };
private:
vector<RegisterData> regs; //!< All the registers together
vector<Register> regTuples; //!< Tuples are used for many src / dst
--
1.9.1
More information about the Beignet
mailing list