[Beignet] [PATCH 5/5] GBE: preparation to mix simd16 into simd8 kernel.
Zhigang Gong
zhigang.gong at intel.com
Wed May 21 18:41:52 PDT 2014
This patch modify the scalarize pass to get the IR layer
vector information. And pass that information to backend.
backend will create two types of selection vector. one is
for general selection vector which must be in contiguous
region, and the other is IR layer vector which is better
to be in contiguous register region.
Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
backend/src/backend/gen_insn_selection.cpp | 54 +++++++++++++++---
backend/src/backend/gen_insn_selection.hpp | 44 ++++++++++++---
backend/src/backend/gen_reg_allocation.cpp | 89 +++++++++++++++++++++---------
backend/src/ir/function.hpp | 25 +++++++++
backend/src/ir/unit.cpp | 1 +
backend/src/ir/unit.hpp | 39 +++++++++++++
backend/src/llvm/llvm_gen_backend.cpp | 14 ++++-
backend/src/llvm/llvm_gen_backend.hpp | 3 +-
backend/src/llvm/llvm_scalarize.cpp | 13 ++++-
backend/src/llvm/llvm_to_gen.cpp | 2 +-
10 files changed, 233 insertions(+), 51 deletions(-)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 686e065..8925bbf 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -187,14 +187,6 @@ namespace gbe
}
///////////////////////////////////////////////////////////////////////////
- // SelectionVector
- ///////////////////////////////////////////////////////////////////////////
-
- SelectionVector::SelectionVector(void) :
- insn(NULL), reg(NULL), regNum(0), isSrc(0)
- {}
-
- ///////////////////////////////////////////////////////////////////////////
// SelectionBlock
///////////////////////////////////////////////////////////////////////////
@@ -1058,9 +1050,13 @@ namespace gbe
insn->extra.elem = bti;
SelectionVector *vector = this->appendVector();
+#if 0
vector->regNum = srcNum;
vector->reg = &insn->src(0);
vector->isSrc = 1;
+#else
+ vector->setVectorReg(&insn->src(0), srcNum, true);
+#endif
}
void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); }
@@ -1090,6 +1086,7 @@ namespace gbe
insn->extra.elem = valueNum;
// Only the temporary registers need contiguous allocation
+#if 0
dstVector->regNum = elemNum - valueNum;
dstVector->isSrc = 0;
dstVector->reg = &insn->dst(0);
@@ -1098,6 +1095,10 @@ namespace gbe
srcVector->regNum = 1;
srcVector->isSrc = 1;
srcVector->reg = &insn->src(0);
+#else
+ dstVector->setVectorReg(&insn->dst(0), elemNum - valueNum, false);
+ srcVector->setVectorReg(&insn->src(0), 1, true);
+#endif
}
void Selection::Opaque::UNTYPED_READ(Reg addr,
@@ -1116,7 +1117,7 @@ namespace gbe
insn->src(0) = addr;
insn->extra.function = bti;
insn->extra.elem = elemNum;
-
+#if 0
// Sends require contiguous allocation
dstVector->regNum = elemNum;
dstVector->isSrc = 0;
@@ -1125,6 +1126,10 @@ namespace gbe
srcVector->regNum = 1;
srcVector->isSrc = 1;
srcVector->reg = &insn->src(0);
+#else
+ dstVector->setVectorReg(&insn->dst(0), elemNum, false);
+ srcVector->setVectorReg(&insn->src(0), 1, true);
+#endif
}
/* elemNum contains all the temporary register and the
@@ -1149,9 +1154,13 @@ namespace gbe
insn->extra.elem = srcNum;
// Only the addr + temporary registers need to be contiguous.
+#if 0
vector->regNum = dstNum;
vector->reg = &insn->dst(0);
vector->isSrc = 1;
+#else
+ vector->setVectorReg(&insn->dst(0), dstNum, false);
+#endif
}
void Selection::Opaque::UNTYPED_WRITE(Reg addr,
@@ -1170,9 +1179,13 @@ namespace gbe
insn->extra.elem = elemNum;
// Sends require contiguous allocation for the sources
+#if 0
vector->regNum = elemNum+1;
vector->reg = &insn->src(0);
vector->isSrc = 1;
+#else
+ vector->setVectorReg(&insn->dst(0), elemNum + 1, true);
+#endif
}
void Selection::Opaque::BYTE_GATHER(Reg dst, Reg addr, uint32_t elemSize, uint32_t bti) {
@@ -1190,12 +1203,17 @@ namespace gbe
// byte gather requires vector in the sense that scalar are not allowed
// (yet)
+#if 0
dstVector->regNum = 1;
dstVector->isSrc = 0;
dstVector->reg = &insn->dst(0);
srcVector->regNum = 1;
srcVector->isSrc = 1;
srcVector->reg = &insn->src(0);
+#else
+ dstVector->setVectorReg(&insn->dst(0), 1, false);
+ srcVector->setVectorReg(&insn->src(0), 1, true);
+#endif
}
void Selection::Opaque::BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, uint32_t bti) {
@@ -1209,9 +1227,13 @@ namespace gbe
insn->extra.elem = elemSize;
// value and address are contiguous in the send
+#if 0
vector->regNum = 2;
vector->isSrc = 1;
vector->reg = &insn->src(0);
+#else
+ vector->setVectorReg(&insn->src(0), 2, true);
+#endif
}
void Selection::Opaque::DWORD_GATHER(Reg dst, Reg addr, uint32_t bti) {
@@ -1224,12 +1246,17 @@ namespace gbe
insn->src(0) = addr;
insn->dst(0) = dst;
insn->extra.function = bti;
+#if 0
vector->regNum = 1;
vector->isSrc = 0;
vector->reg = &insn->dst(0);
srcVector->regNum = 1;
srcVector->isSrc = 1;
srcVector->reg = &insn->src(0);
+#else
+ vector->setVectorReg(&insn->dst(0), 1, false);
+ srcVector->setVectorReg(&insn->src(0), 1, true);
+#endif
}
void Selection::Opaque::UNPACK_BYTE(const GenRegister *dst, const GenRegister src, uint32_t elemNum) {
@@ -1600,6 +1627,7 @@ namespace gbe
for (uint32_t elemID = 0; elemID < msgNum; ++elemID)
insn->src(elemID) = msgPayloads[elemID];
+#if 0
// Sends require contiguous allocation
dstVector->regNum = dstNum;
dstVector->isSrc = 0;
@@ -1609,6 +1637,10 @@ namespace gbe
msgVector->regNum = msgNum;
msgVector->isSrc = 1;
msgVector->reg = &insn->src(0);
+#else
+ dstVector->setVectorReg(&insn->dst(0), dstNum, false);
+ msgVector->setVectorReg(&insn->src(0), msgNum, true);
+#endif
insn->extra.rdbti = bti;
insn->extra.sampler = sampler;
@@ -1638,10 +1670,14 @@ namespace gbe
insn->extra.bti = bti;
insn->extra.msglen = msgNum;
insn->extra.is3DWrite = is3D;
+#if 0
// Sends require contiguous allocation
msgVector->regNum = msgNum;
msgVector->isSrc = 1;
msgVector->reg = &insn->src(0);
+#else
+ msgVector->setVectorReg(&insn->src(0), msgNum, true);
+#endif
}
Selection::~Selection(void) { GBE_DELETE(this->opaque); }
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 1f48b23..eb8b2a4 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -149,23 +149,53 @@ namespace gbe
friend class Selection;
};
+ // Owns the selection block
+ class Selection;
+
/*! Instructions like sends require to make registers contiguous in GRF */
class SelectionVector : public NonCopyable, public intrusive_list_node
{
public:
- SelectionVector(void);
+ SelectionVector(void) : insn(NULL), reg(NULL), regNum(0),
+ issrc(false), isirvector(false) {}
+
/*! The instruction that requires the vector of registers */
SelectionInstruction *insn;
- /*! Directly points to the selection instruction registers */
- GenRegister *reg;
+ const ir::Register getReg(uint32_t regID) const {
+ return isirvector ? irReg[regID] : reg[regID].reg();
+ }
+ const uint32_t getNum(void) const { return regNum; }
+ bool isSrc(void) const { return issrc; }
+ bool isIRVector(void) const { return isirvector; }
+
+ INLINE void setVectorReg(const ir::Register *ir, uint32_t num) {
+ irReg = ir;
+ regNum = num;
+ isirvector = true;
+ }
+
+ private:
+ union {
+ /*! Directly points to the selection instruction registers, only exist when isIRVector == false */
+ const GenRegister *reg;
+ /*! is used to represent IR vector, only exist when isIRVector == true*/
+ const ir::Register *irReg;
+ };
+ INLINE void setVectorReg(GenRegister *r, uint32_t num, bool src) {
+ reg = r;
+ regNum = num;
+ isirvector = false;
+ issrc = src;
+ }
/*! Number of registers in the vector */
uint16_t regNum;
/*! Indicate if this a destination or a source vector */
- uint16_t isSrc;
- };
+ bool issrc;
+ /*! Indicate if this is a LLVM IR layer vector. */
+ bool isirvector;
+ friend class Selection;
- // Owns the selection block
- class Selection;
+ };
/*! A selection block is the counterpart of the IR Basic block. It contains
* the instructions generated from an IR basic block
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index f642c2e..83adaa5 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -154,10 +154,11 @@ namespace gbe
map<ir::Register, uint32_t> RA;
/*! Map offset to virtual registers. */
map<uint32_t, ir::Register> offsetReg;
- /*! Provides the position of each register in a vector */
+ /*! Provides the position of each register in a selection vector */
map<ir::Register, VectorLocation> vectorMap;
/*! All vectors used in the selection */
vector<SelectionVector*> vectors;
+ vector<vector<ir::Register>*> irVectors;
/*! The set of booleans that will go to GRF (cannot be kept into flags) */
set<ir::Register> grfBooleans;
/*! The set of booleans which be held in flags, don't need to allocate grf */
@@ -265,7 +266,7 @@ namespace gbe
}
bool GenRegAllocator::Opaque::isAllocated(const SelectionVector *vector) const {
- const ir::Register first = vector->reg[0].reg();
+ const ir::Register first = vector->getReg(0);
const auto it = vectorMap.find(first);
// If the first register is not allocated we are done
@@ -276,15 +277,15 @@ namespace gbe
// still registers to allocate
const SelectionVector *other = it->second.first;
const uint32_t otherFirst = it->second.second;
- const uint32_t leftNum = other->regNum - otherFirst;
- if (leftNum < vector->regNum)
+ const uint32_t leftNum = other->getNum() - otherFirst;
+ if (leftNum < vector->getNum())
return false;
// Now check that all the registers in the already allocated vector match
// the current vector
- for (uint32_t regID = 1; regID < vector->regNum; ++regID) {
- const ir::Register from = vector->reg[regID].reg();
- const ir::Register to = other->reg[regID + otherFirst].reg();
+ for (uint32_t regID = 1; regID < vector->getNum(); ++regID) {
+ const ir::Register from = vector->getReg(regID);
+ const ir::Register to = other->getReg(regID + otherFirst);
if (from != to)
return false;
}
@@ -292,8 +293,8 @@ namespace gbe
}
void GenRegAllocator::Opaque::coalesce(Selection &selection, SelectionVector *vector) {
- for (uint32_t regID = 0; regID < vector->regNum; ++regID) {
- const ir::Register reg = vector->reg[regID].reg();
+ for (uint32_t regID = 0; regID < vector->getNum(); ++regID) {
+ const ir::Register reg = vector->getReg(regID);
const auto it = this->vectorMap.find(reg);
// case 1: the register is not already in a vector, so it can stay in this
// vector. Note that local IDs are *non-scalar* special registers but will
@@ -315,21 +316,51 @@ namespace gbe
// and the order is maintained, we can reuse the previous vector and avoid
// the MOVs
else {
- ir::Register tmp;
- tmp = this->replaceReg(selection, vector->insn, regID, vector->isSrc);
- const VectorLocation location = std::make_pair(vector, regID);
- this->vectorMap.insert(std::make_pair(tmp, location));
+ if (!vector->isIRVector()) {
+ ir::Register tmp;
+ tmp = this->replaceReg(selection, vector->insn, regID, vector->isSrc());
+ const VectorLocation location = std::make_pair(vector, regID);
+ this->vectorMap.insert(std::make_pair(tmp, location));
+ } else {
+ // If this is a IR vector and already in another vector,
+ // we have to erase the previous allocated elements.
+ // As we can't do a replaceReg for IR vector type which is not only used in one
+ // instruction.
+ for (uint32_t id = 0; id < regID; ++id)
+ this->vectorMap.erase(vector->getReg(id));
+ break;
+ }
}
}
}
/*! Will sort vector in decreasing order */
inline bool cmp(const SelectionVector *v0, const SelectionVector *v1) {
- return v0->regNum > v1->regNum;
+ return v0->getNum() > v1->getNum();
}
void GenRegAllocator::Opaque::allocateVector(Selection &selection) {
- const uint32_t vectorNum = selection.getVectorNum();
+
+ // First we collect all the IR layer vector to a temporary array.
+ const ir::IRVectorMap *irVectorMap = ctx.getFunction().getIRVectorMap();
+ vector<SelectionVector *> SelIRVectors;
+ if (ctx.getSimdWidth() == 8) {
+ for (auto &it : *irVectorMap) {
+ uint32_t i = 0;
+ const ir::IRVector *iv = &it.second;
+ while(i < iv->regNum) {
+ // Collect a 2 elements vector is enough for the mix simd16 optimization.
+ if (iv->regNum - i >= 2) {
+ SelectionVector *sv = new SelectionVector();
+ sv->setVectorReg(&(iv->regs[i]), 2);
+ SelIRVectors.push_back(sv);
+ i += 2;
+ } else
+ break;
+ }
+ }
+ }
+ const uint32_t vectorNum = selection.getVectorNum() + SelIRVectors.size();
this->vectors.resize(vectorNum);
// First we find and store all vectors
@@ -337,6 +368,10 @@ namespace gbe
for (auto &block : *selection.blockList)
for (auto &v : block.vectorList)
this->vectors[vectorID++] = &v;
+ // add ir vectors into the array.
+ for (auto &v : SelIRVectors)
+ this->vectors[vectorID++] = v;
+
GBE_ASSERT(vectorID == vectorNum);
// Heuristic (really simple...): sort them by the number of registers they
@@ -670,25 +705,25 @@ namespace gbe
if (it != vectorMap.end()) {
const SelectionVector *vector = it->second.first;
// all the reg in the SelectionVector are spilled
- if(spilledRegs.find(vector->reg[0].reg())
+ if(spilledRegs.find(vector->getReg(0))
!= spilledRegs.end())
continue;
uint32_t alignment;
ir::RegisterFamily family;
getRegAttrib(reg, alignment, &family);
- const uint32_t size = vector->regNum * alignment;
+ const uint32_t size = vector->getNum() * alignment;
const uint32_t grfOffset = allocateReg(interval, size, alignment);
if(grfOffset == 0) {
GBE_ASSERT(!(reservedReg && family != ir::FAMILY_DWORD));
- for(int i = vector->regNum-1; i >= 0; i--) {
- if (!spillReg(vector->reg[i].reg()))
+ for(int i = vector->getNum()-1; i >= 0; i--) {
+ if (!spillReg(vector->getReg(i)))
return false;
}
continue;
}
- for (uint32_t regID = 0; regID < vector->regNum; ++regID) {
- const ir::Register reg = vector->reg[regID].reg();
+ for (uint32_t regID = 0; regID < vector->getNum(); ++regID) {
+ const ir::Register reg = vector->getReg(regID);
GBE_ASSERT(RA.contains(reg) == false
&& ctx.sel->getRegisterData(reg).family == family);
insertNewReg(reg, grfOffset + alignment * regID, true);
@@ -837,8 +872,8 @@ namespace gbe
// If a partial of a vector is expired, the vector will be unspillable, currently.
// FIXME we may need to fix those unspillable vector in the furture.
INLINE bool GenRegAllocator::Opaque::vectorCanSpill(SelectionVector *vector) {
- for(uint32_t id = 0; id < vector->regNum; id++)
- if (spillCandidate.find(intervals[(ir::Register)(vector->reg[id].value.reg)])
+ for(uint32_t id = 0; id < vector->getNum(); id++)
+ if (spillCandidate.find(intervals[(ir::Register)(vector->getReg(id))])
== spillCandidate.end())
return false;
return true;
@@ -872,11 +907,11 @@ namespace gbe
if (isVector
&& (vectorCanSpill(vectorIt->second.first))) {
const SelectionVector *vector = vectorIt->second.first;
- for (uint32_t id = 0; id < vector->regNum; id++) {
- GBE_ASSERT(spilledRegs.find(vector->reg[id].reg())
+ for (uint32_t id = 0; id < vector->getNum(); id++) {
+ GBE_ASSERT(spilledRegs.find(vector->getReg(id))
== spilledRegs.end());
- spillSet.insert(vector->reg[id].reg());
- reg = vector->reg[id].reg();
+ reg = vector->getReg(id);
+ spillSet.insert(reg);
family = ctx.sel->getRegisterFamily(reg);
size -= family == ir::FAMILY_QWORD ? 2 * GEN_REG_SIZE * ctx.getSimdWidth()/8
: GEN_REG_SIZE * ctx.getSimdWidth()/8;
diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp
index 266e652..7fa395c 100644
--- a/backend/src/ir/function.hpp
+++ b/backend/src/ir/function.hpp
@@ -147,6 +147,14 @@ namespace ir {
GBE_STRUCT(Loop);
};
+ /*! Map of all IR vector. */
+ typedef struct IRVector {
+ IRVector() : regNum(0) { for(uint32_t i = 0; i < 16; i++) regs[i] = (Register) -1; }
+ uint32_t regNum;
+ Register regs[16];
+ } IRVector;
+ typedef map<const void *, IRVector> IRVectorMap;
+
/*! A function is :
* - a register file
* - a set of basic block layout into a CGF
@@ -340,6 +348,22 @@ namespace ir {
/*! add the loop info for later liveness analysis */
void addLoop(const vector<LabelIndex> &bbs, const vector<std::pair<LabelIndex, LabelIndex>> &exits);
INLINE const vector<Loop * > &getLoops() { return loops; }
+ /* Get reg vectors which indicate which registers are in a logical vector. */
+ INLINE const IRVectorMap *getIRVectorMap(void) const { return &irVectorMap; }
+ INLINE void insertIRVectorElement(const void *vectorValue, uint32_t id, Register reg) {
+ auto it = irVectorMap.find(vectorValue);
+ if (it != irVectorMap.end()) {
+ GBE_ASSERT(it->second.regs[id] == (Register) -1);
+ it->second.regs[id] = reg;
+ it->second.regNum++;
+ } else {
+ IRVector ir;
+ ir.regNum = 1;
+ ir.regs[id] = reg;
+ irVectorMap.insert(std::make_pair(vectorValue, ir));
+ }
+ }
+
private:
friend class Context; //!< Can freely modify a function
std::string name; //!< Function name
@@ -350,6 +374,7 @@ namespace ir {
vector<Immediate> immediates; //!< All immediate values in the function
vector<BasicBlock*> blocks; //!< All chained basic blocks
vector<Loop *> loops; //!< Loops info of the function
+ IRVectorMap irVectorMap; //!< IR vectors map
RegisterFile file; //!< RegisterDatas used by the instructions
Profile profile; //!< Current function profile
PushMap pushMap; //!< Pushed function arguments (reg->loc)
diff --git a/backend/src/ir/unit.cpp b/backend/src/ir/unit.cpp
index 4f9d740..0718a83 100644
--- a/backend/src/ir/unit.cpp
+++ b/backend/src/ir/unit.cpp
@@ -30,6 +30,7 @@ namespace ir {
Unit::Unit(PointerSize pointerSize) : pointerSize(pointerSize), valid(true) {}
Unit::~Unit(void) {
for (const auto &pair : functions) GBE_DELETE(pair.second);
+ for (const auto it : vectorMaps) delete it.second;
}
Function *Unit::getFunction(const std::string &name) const {
auto it = functions.find(name);
diff --git a/backend/src/ir/unit.hpp b/backend/src/ir/unit.hpp
index adebd3f..94db4be 100644
--- a/backend/src/ir/unit.hpp
+++ b/backend/src/ir/unit.hpp
@@ -42,6 +42,13 @@ namespace ir {
{
public:
typedef hash_map<std::string, Function*> FunctionSet;
+ typedef struct VectorIndex{
+ VectorIndex(const void *v, uint32_t id) : vectorValue(v), id(id) {}
+ const void *vectorValue;
+ uint32_t id;
+ } VectorIndex;
+ typedef map<const void *, VectorIndex> VectorMap; //!< a heuristic for mix simd16 optimization.
+
/*! Create an empty unit */
Unit(PointerSize pointerSize = POINTER_32_BITS);
/*! Release everything (*including* the function pointers) */
@@ -74,8 +81,40 @@ namespace ir {
const ConstantSet& getConstantSet(void) const { return constantSet; }
void setValid(bool value) { valid = value; }
bool getValid() { return valid; }
+ /*! set curr llvm function, for scalarize and gen pass. */
+ void setCurrLLVMFunction(void *f) { function = f; }
+ /*! insert a new vector element. */
+ void insertVectorElement(const void *vectorValue, const void *value, int id) {
+ GBE_ASSERT(function != NULL);
+ auto it = vectorMaps.find(function);
+ VectorMap *vectorMap;
+ if (it != vectorMaps.end())
+ vectorMap = it->second;
+ else {
+ vectorMap = new VectorMap();
+ vectorMaps.insert(std::make_pair(function, vectorMap));
+ }
+ VectorIndex vi(vectorValue, id);
+ vectorMap->insert(std::make_pair(value, vi));
+ }
+ /*! get a value's vector index information. */
+ const VectorIndex *getVectorIndex(void *valueKey) {
+ auto it = vectorMaps.find(function);
+ if (it == vectorMaps.end())
+ return NULL;
+ auto vectorMap = it->second;
+ auto vi = vectorMap->find(valueKey);
+ return vi != vectorMap->end() ? &vi->second : NULL;
+ }
+ void clearVectorMap(void) {
+ auto it = vectorMaps.find(function);
+ if (it != vectorMaps.end())
+ it->second->clear();
+ }
private:
friend class ContextInterface; //!< Can free modify the unit
+ const void * function; //!< current llvm function.
+ map<const void *, map<const void *, VectorIndex>*> vectorMaps;
hash_map<std::string, Function*> functions; //!< All the defined functions
ConstantSet constantSet; //!< All the constants defined in the unit
PointerSize pointerSize; //!< Size shared by all pointers
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 82429d0..fbd125a 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -483,6 +483,7 @@ namespace gbe
// definitions outside the translation unit.
if (F.hasAvailableExternallyLinkage())
return false;
+ unit.setCurrLLVMFunction(&F);
// As we inline all function calls, so skip non-kernel functions
bool bKernel = isKernelFunction(F);
@@ -875,14 +876,21 @@ namespace gbe
case Type::FloatTyID:
case Type::DoubleTyID:
case Type::PointerTyID:
- regTranslator.newScalar(value, key, 0, uniform);
+ {
+ auto reg = regTranslator.newScalar(value, key, 0, uniform);
+ auto vi = unit.getVectorIndex(key == NULL ? value : key);
+ if (vi != NULL)
+ this->ctx.getFunction().insertIRVectorElement(vi->vectorValue, vi->id, reg);
break;
+ }
case Type::VectorTyID:
{
auto vectorType = cast<VectorType>(type);
const uint32_t elemNum = vectorType->getNumElements();
- for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
- regTranslator.newScalar(value, key, elemID, uniform);
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+ auto reg = regTranslator.newScalar(value, key, elemID, uniform);
+ this->ctx.getFunction().insertIRVectorElement(value, elemID, reg);
+ }
break;
}
default: NOT_SUPPORTED;
diff --git a/backend/src/llvm/llvm_gen_backend.hpp b/backend/src/llvm/llvm_gen_backend.hpp
index 26323a3..80c2a5f 100644
--- a/backend/src/llvm/llvm_gen_backend.hpp
+++ b/backend/src/llvm/llvm_gen_backend.hpp
@@ -30,6 +30,7 @@
#include "sys/platform.hpp"
#include "sys/map.hpp"
#include "sys/hash_map.hpp"
+#include "ir/unit.hpp"
#include <algorithm>
// LLVM Type
@@ -88,7 +89,7 @@ namespace gbe
llvm::BasicBlockPass *createLoadStoreOptimizationPass();
/*! Scalarize all vector op instructions */
- llvm::FunctionPass* createScalarizePass();
+ llvm::FunctionPass* createScalarizePass(ir::Unit * unit = NULL);
/*! Remove/add NoDuplicate function attribute for barrier functions. */
llvm::ModulePass* createBarrierNodupPass(bool);
diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp
index 73817e2..70dddff 100644
--- a/backend/src/llvm/llvm_scalarize.cpp
+++ b/backend/src/llvm/llvm_scalarize.cpp
@@ -93,6 +93,7 @@
#include "llvm/llvm_gen_backend.hpp"
#include "sys/map.hpp"
+#include "ir/unit.hpp"
using namespace llvm;
@@ -124,7 +125,7 @@ namespace gbe {
// Standard pass stuff
static char ID;
- Scalarize() : FunctionPass(ID)
+ Scalarize(ir::Unit *unit = NULL) : FunctionPass(ID), unit(unit)
{
initializeLoopInfoPass(*PassRegistry::getPassRegistry());
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5
@@ -231,6 +232,7 @@ namespace gbe {
builder->SetInsertPoint(++next);
}
+ ir::Unit *unit;
DenseMap<Value*, VectorValues> vectorVals;
Module* module;
IRBuilder<>* builder;
@@ -465,6 +467,7 @@ namespace gbe {
gatherComponents(i, args, callArgs);
Instruction* res = createScalarInstruction(inst, callArgs);
+ if (unit) unit->insertVectorElement(inst, res, i);
vVals.setComponent(i, res);
builder->Insert(res);
@@ -765,6 +768,10 @@ namespace gbe {
bool Scalarize::runOnFunction(Function& F)
{
+ if (unit) {
+ unit->setCurrLLVMFunction(&F);
+ unit->clearVectorMap();
+ }
switch (F.getCallingConv()) {
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2
case CallingConv::PTX_Device:
@@ -856,9 +863,9 @@ namespace gbe {
{
return;
}
- FunctionPass* createScalarizePass()
+ FunctionPass* createScalarizePass(ir::Unit *unit)
{
- return new Scalarize();
+ return new Scalarize(unit);
}
char Scalarize::ID = 0;
diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
index 9282b3f..80f6bd6 100644
--- a/backend/src/llvm/llvm_to_gen.cpp
+++ b/backend/src/llvm/llvm_to_gen.cpp
@@ -204,7 +204,7 @@ namespace gbe
passes.add(createLowerSwitchPass());
passes.add(createPromoteMemoryToRegisterPass());
passes.add(createGVNPass()); // Remove redundancies
- passes.add(createScalarizePass()); // Expand all vector ops
+ passes.add(createScalarizePass(&unit)); // Expand all vector ops
passes.add(createDeadInstEliminationPass()); // Remove simplified instructions
passes.add(createCFGSimplificationPass()); // Merge & remove BBs
passes.add(createScalarizePass()); // Expand all vector ops
--
1.8.3.2
More information about the Beignet
mailing list