[Beignet] [PATCH 1/3] Add a scalarize llvm pass.
Yang Rong
rong.r.yang at intel.com
Wed May 15 21:36:33 PDT 2013
In this pass, expand all normal vector ops to scalar ops, except store/load, image read/write and function's argument. Add fake ExtractElement/InsertElement instructions to avoid dead instruction elimination, and unit valueMap hold the relationship between these fake instructions and real load/store instructions.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
backend/src/CMakeLists.txt | 1 +
backend/src/ir/unit.hpp | 22 +-
backend/src/llvm/llvm_gen_backend.cpp | 241 +++-------
backend/src/llvm/llvm_gen_backend.hpp | 30 +-
backend/src/llvm/llvm_scalarize.cpp | 801 +++++++++++++++++++++++++++++++++
backend/src/llvm/llvm_to_gen.cpp | 1 +
6 files changed, 914 insertions(+), 182 deletions(-)
create mode 100644 backend/src/llvm/llvm_scalarize.cpp
diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index 1829964..183517a 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -83,6 +83,7 @@ else (GBE_USE_BLOB)
backend/program.h
llvm/llvm_gen_backend.cpp
llvm/llvm_passes.cpp
+ llvm/llvm_scalarize.cpp
llvm/llvm_to_gen.cpp
llvm/llvm_gen_backend.hpp
llvm/llvm_gen_ocl_function.hxx
diff --git a/backend/src/ir/unit.hpp b/backend/src/ir/unit.hpp
index ae78638..3b293f5 100644
--- a/backend/src/ir/unit.hpp
+++ b/backend/src/ir/unit.hpp
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
@@ -24,9 +24,12 @@
#ifndef __GBE_IR_UNIT_HPP__
#define __GBE_IR_UNIT_HPP__
+#include "llvm/Value.h"
+
#include "ir/constant.hpp"
#include "ir/register.hpp"
#include "sys/hash_map.hpp"
+#include "sys/map.hpp"
namespace gbe {
namespace ir {
@@ -41,6 +44,7 @@ namespace ir {
{
public:
typedef hash_map<std::string, Function*> FunctionSet;
+ typedef std::pair<llvm::Value*, uint32_t> ValueIndex;
/*! Create an empty unit */
Unit(PointerSize pointerSize = POINTER_32_BITS);
/*! Release everything (*including* the function pointers) */
@@ -71,11 +75,27 @@ namespace ir {
ConstantSet& getConstantSet(void) { return constantSet; }
/*! Return the constant set */
const ConstantSet& getConstantSet(void) const { return constantSet; }
+
+ /*! Some values will not be allocated. For example a vector extract and
+ * a vector insertion when scalarize the vector load/store
+ */
+ void newValueProxy(llvm::Value *real,
+ llvm::Value *fake,
+ uint32_t realIndex = 0u,
+ uint32_t fakeIndex = 0u) {
+ const ValueIndex key(fake, fakeIndex);
+ const ValueIndex value(real, realIndex);
+ GBE_ASSERT(valueMap.find(key) == valueMap.end()); // Do not insert twice
+ valueMap[key] = value;
+ }
+ /*! Return the value map */
+ const map<ValueIndex, ValueIndex>& getValueMap(void) const { return valueMap; }
private:
friend class ContextInterface; //!< Can free modify the unit
hash_map<std::string, Function*> functions; //!< All the defined functions
ConstantSet constantSet; //!< All the constants defined in the unit
PointerSize pointerSize; //!< Size shared by all pointers
+ map<ValueIndex, ValueIndex> valueMap; //!< fake to real value map for vector load/store
GBE_CLASS(Unit);
};
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 8dcf15c..3855011 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
@@ -60,7 +60,7 @@
* dependencies on endianness or ABIs. Fortunately, the ptx (and nvptx for LLVM
* 3.2) profile is pretty well adapted to our needs since NV and Gen GPU are
* kind of similar, or at least they are similar enough to share the same front
- * end.
+ * end.
*
* Problems
* ========
@@ -126,10 +126,8 @@
#include "ir/context.hpp"
#include "ir/unit.hpp"
#include "ir/liveness.hpp"
-#include "sys/map.hpp"
#include "sys/set.hpp"
#include "sys/cvar.hpp"
-#include <algorithm>
/* Not defined for LLVM 3.0 */
#if !defined(LLVM_VERSION_MAJOR)
@@ -207,7 +205,7 @@ namespace gbe
/*! Type to register family translation */
static ir::RegisterFamily getFamily(const ir::Context &ctx, const Type *type)
{
- GBE_ASSERT(isScalarType(type) == true);
+ GBE_ASSERT(isScalarType(type) == true);
if (type == Type::getInt1Ty(type->getContext()))
return ir::FAMILY_BOOL;
if (type == Type::getInt8Ty(type->getContext()))
@@ -269,6 +267,8 @@ namespace gbe
class RegisterTranslator
{
public:
+ /*! Indices will be zero for scalar values */
+ typedef std::pair<Value*, uint32_t> ValueIndex;
RegisterTranslator(ir::Context &ctx) : ctx(ctx) {}
/*! Empty the maps */
@@ -289,6 +289,11 @@ namespace gbe
GBE_ASSERT(valueMap.find(key) == valueMap.end()); // Do not insert twice
valueMap[key] = value;
}
+ /*! After scalarize pass, there are some valueMap in unit,
+ * use this function to copy from unit valueMap */
+ void initValueMap(const map<ValueIndex, ValueIndex>& vMap) {
+ valueMap.insert(vMap.begin(), vMap.end());
+ }
/*! Mostly used for the preallocated registers (lids, gids) */
void newScalarProxy(ir::Register reg, Value *value, uint32_t index = 0u) {
const ValueIndex key(value, index);
@@ -325,10 +330,9 @@ namespace gbe
};
return ir::Register();
}
- /*! Get the register from the given value at given index possibly iterating
- * in the value map to get the final real register
- */
- ir::Register getScalar(Value *value, uint32_t index = 0u) {
+
+ /*! iterating in the value map to get the final real register */
+ void getRealValue(Value* &value, uint32_t& index) {
auto end = valueMap.end();
for (;;) {
auto it = valueMap.find(std::make_pair(value, index));
@@ -339,6 +343,14 @@ namespace gbe
index = it->second.second;
}
}
+ }
+
+ /*! Get the register from the given value at given index possibly iterating
+ * in the value map to get the final real register
+ */
+ ir::Register getScalar(Value *value, uint32_t index = 0u) {
+ getRealValue(value, index);
+
const auto key = std::make_pair(value, index);
GBE_ASSERT(scalarMap.find(key) != scalarMap.end());
return scalarMap[key];
@@ -351,16 +363,8 @@ namespace gbe
}
/*! Says if the value exists. Otherwise, it is undefined */
bool valueExists(Value *value, uint32_t index) {
- auto end = valueMap.end();
- for (;;) {
- auto it = valueMap.find(std::make_pair(value, index));
- if (it == end)
- break;
- else {
- value = it->second.first;
- index = it->second.second;
- }
- }
+ getRealValue(value, index);
+
const auto key = std::make_pair(value, index);
return scalarMap.find(key) != scalarMap.end();
}
@@ -375,8 +379,6 @@ namespace gbe
this->insertRegister(reg, key, index);
return reg;
}
- /*! Indices will be zero for scalar values */
- typedef std::pair<Value*, uint32_t> ValueIndex;
/*! Map value to ir::Register */
map<ValueIndex, ir::Register> scalarMap;
/*! Map values to values when this is only a translation (eq bitcast) */
@@ -384,28 +386,6 @@ namespace gbe
/*! Actually allocates the registers */
ir::Context &ctx;
};
- /*! All intrinsic Gen functions */
- enum OCLInstrinsic {
-#define DECL_LLVM_GEN_FUNCTION(ID, NAME) GEN_OCL_##ID,
-#include "llvm_gen_ocl_function.hxx"
-#undef DECL_LLVM_GEN_FUNCTION
- };
-
- /*! Build the hash map for OCL functions on Gen */
- struct OCLIntrinsicMap {
- /*! Build the intrinsic hash map */
- OCLIntrinsicMap(void) {
-#define DECL_LLVM_GEN_FUNCTION(ID, NAME) \
- map.insert(std::make_pair(#NAME, GEN_OCL_##ID));
-#include "llvm_gen_ocl_function.hxx"
-#undef DECL_LLVM_GEN_FUNCTION
- }
- /*! Sort intrinsics with their names */
- hash_map<std::string, OCLInstrinsic> map;
- };
-
- /*! Sort the OCL Gen instrinsic functions (built on pre-main) */
- static const OCLIntrinsicMap instrinsicMap;
/*! Translate LLVM IR code to Gen IR code */
class GenWriter : public FunctionPass, public InstVisitor<GenWriter>
@@ -423,7 +403,7 @@ namespace gbe
*/
set<const Value*> conditionSet;
/*! We visit each function twice. Once to allocate the registers and once to
- * emit the Gen IR instructions
+ * emit the Gen IR instructions
*/
enum Pass {
PASS_EMIT_REGISTERS = 0,
@@ -663,7 +643,7 @@ namespace gbe
if (dyn_cast<ConstantAggregateZero>(CPV)) {
return doIt(uint32_t(0)); // XXX Handle type
} else {
- if (dyn_cast<ConstantVector>(CPV))
+ if (dyn_cast<ConstantVector>(CPV))
CPV = extractConstantElem(CPV, index);
GBE_ASSERTM(dyn_cast<ConstantExpr>(CPV) == NULL, "Unsupported constant expression");
@@ -756,6 +736,9 @@ namespace gbe
}
ir::Register GenWriter::getRegister(Value *value, uint32_t elemID) {
+ //the real value may be constant, so get real value before constant check
+ regTranslator.getRealValue(value, elemID);
+
if (dyn_cast<ConstantExpr>(value)) {
ConstantExpr *ce = dyn_cast<ConstantExpr>(value);
if(ce->isCast()) {
@@ -867,6 +850,7 @@ namespace gbe
"Returned value for kernel functions is forbidden");
// Loop over the arguments and output registers for them
if (!F.arg_empty()) {
+ uint32_t argID = 0;
Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
// Insert a new register for each function argument
@@ -875,10 +859,33 @@ namespace gbe
uint32_t argID = 1; // Start at one actually
for (; I != E; ++I, ++argID) {
#else
- for (; I != E; ++I) {
+ for (; I != E; ++I, ++argID) {
#endif /* LLVM_VERSION_MINOR <= 1 */
const std::string &argName = I->getName().str();
Type *type = I->getType();
+
+ //add support for vector argument
+ if(type->isVectorTy()) {
+ VectorType *vectorType = cast<VectorType>(type);
+
+ this->newRegister(I);
+ ir::Register reg = getRegister(I, 0);
+
+ Type *elemType = vectorType->getElementType();
+ const uint32_t elemSize = getTypeByteSize(unit, elemType);
+ const uint32_t elemNum = vectorType->getNumElements();
+ //vector's elemType always scalar type
+ ctx.input(argName, ir::FunctionArgument::VALUE, reg, elemNum*elemSize);
+
+ ir::Function& fn = ctx.getFunction();
+ for(uint32_t i=1; i < elemNum; i++) {
+ ir::PushLocation argLocation(fn, argID, elemSize*i);
+ reg = getRegister(I, i);
+ ctx.appendPushedConstant(reg, argLocation); //add to push map for reg alloc
+ }
+ continue;
+ }
+
GBE_ASSERTM(isScalarType(type) == true,
"vector type in the function argument is not supported yet");
const ir::Register reg = regTranslator.newScalar(I);
@@ -916,7 +923,6 @@ namespace gbe
ctx.input(argName, ir::FunctionArgument::IMAGE, reg, ptrSize);
ctx.getFunction().getImageSet()->append(reg, &ctx);
break;
- break;
default: GBE_ASSERT(addrSpace != ir::MEM_PRIVATE);
}
}
@@ -1141,6 +1147,7 @@ namespace gbe
ctx.startFunction(F.getName());
this->regTranslator.clear();
+ this->regTranslator.initValueMap(unit.getValueMap());
this->labelMap.clear();
this->emitFunctionPrototype(F);
@@ -1495,141 +1502,15 @@ namespace gbe
ir::Context &ctx;
};
- void GenWriter::regAllocateInsertElement(InsertElementInst &I) {
- Value *modified = I.getOperand(0);
- Value *toInsert = I.getOperand(1);
- Value *index = I.getOperand(2);
-
- // Get the index for the insertion
- Constant *CPV = dyn_cast<Constant>(index);
- GBE_ASSERTM(CPV != NULL, "only constant indices when inserting values");
- auto x = processConstant<ir::Immediate>(CPV, InsertExtractFunctor(ctx));
- GBE_ASSERTM(x.type == ir::TYPE_U32 || x.type == ir::TYPE_S32,
- "Invalid index type for InsertElement");
-
- // Crash on overrun
- VectorType *vectorType = cast<VectorType>(modified->getType());
- const uint32_t elemNum = vectorType->getNumElements();
- const uint32_t modifiedID = x.data.u32;
- GBE_ASSERTM(modifiedID < elemNum, "Out-of-bound index for InsertElement");
-
- // The source vector is not constant
- if (!isa<Constant>(modified) || isa<UndefValue>(modified)) {
- // Non modified values are just proxies
- for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
- if (elemID != modifiedID)
- regTranslator.newValueProxy(modified, &I, elemID, elemID);
- }
- // The source vector is constant
- else {
- // Non modified values will use LOADI
- for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
- if (elemID != modifiedID) {
- const ir::Type type = getType(ctx, toInsert->getType());
- const ir::Register reg = ctx.reg(getFamily(type));
- regTranslator.insertRegister(reg, &I, elemID);
- }
- }
-
- // If the element to insert is an immediate we will generate a LOADI.
- // Otherwise, the value is just a proxy of the inserted value
- if (dyn_cast<Constant>(toInsert) != NULL) {
- const ir::Type type = getType(ctx, toInsert->getType());
- const ir::Register reg = ctx.reg(getFamily(type));
- regTranslator.insertRegister(reg, &I, modifiedID);
- } else
- regTranslator.newValueProxy(toInsert, &I, 0, modifiedID);
- }
-
- void GenWriter::emitInsertElement(InsertElementInst &I) {
- // Note that we check everything in regAllocateInsertElement
- Value *modified = I.getOperand(0);
- Value *toInsert = I.getOperand(1);
- Value *index = I.getOperand(2);
-
- // Get the index of the value to insert
- Constant *indexCPV = dyn_cast<Constant>(index);
- auto x = processConstant<ir::Immediate>(indexCPV, InsertExtractFunctor(ctx));
- const uint32_t modifiedID = x.data.u32;
-
- // The source vector is constant. We need to insert LOADI for the unmodified
- // values
- if (isa<Constant>(modified) && !isa<UndefValue>(modified)) {
- VectorType *vectorType = cast<VectorType>(modified->getType());
- const uint32_t elemNum = vectorType->getNumElements();
- for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
- if (elemID != modifiedID) {
- Constant *sourceCPV = dyn_cast<Constant>(modified);
- if (isa<UndefValue>(extractConstantElem(sourceCPV, elemID)) == false) {
- const ir::ImmediateIndex immIndex = this->newImmediate(sourceCPV, elemID);
- const ir::Immediate imm = ctx.getImmediate(immIndex);
- const ir::Register reg = regTranslator.getScalar(&I, elemID);
- ctx.LOADI(imm.type, reg, immIndex);
- }
- }
- }
-
- // If the inserted value is not a constant, we just use a proxy
- if (dyn_cast<Constant>(toInsert) == NULL)
- return;
-
- // We need a LOADI if we insert an immediate
- Constant *toInsertCPV = dyn_cast<Constant>(toInsert);
- const ir::ImmediateIndex immIndex = this->newImmediate(toInsertCPV);
- const ir::Immediate imm = ctx.getImmediate(immIndex);
- const ir::Register reg = regTranslator.getScalar(&I, modifiedID);
- ctx.LOADI(imm.type, reg, immIndex);
- }
-
- void GenWriter::regAllocateExtractElement(ExtractElementInst &I) {
- Value *extracted = I.getOperand(0);
- Value *index = I.getOperand(1);
- GBE_ASSERTM(isa<Constant>(extracted) == false,
- "TODO support constant vector for extract");
- Constant *CPV = dyn_cast<Constant>(index);
- GBE_ASSERTM(CPV != NULL, "only constant indices when inserting values");
- auto x = processConstant<ir::Immediate>(CPV, InsertExtractFunctor(ctx));
- GBE_ASSERTM(x.type == ir::TYPE_U32 || x.type == ir::TYPE_S32,
- "Invalid index type for InsertElement");
-
- // Crash on overrun
- const uint32_t extractedID = x.data.u32;
-#if GBE_DEBUG
- VectorType *vectorType = cast<VectorType>(extracted->getType());
- const uint32_t elemNum = vectorType->getNumElements();
- GBE_ASSERTM(extractedID < elemNum, "Out-of-bound index for InsertElement");
-#endif /* GBE_DEBUG */
-
- // Easy when the vector is not immediate
- regTranslator.newValueProxy(extracted, &I, extractedID, 0);
- }
-
- void GenWriter::emitExtractElement(ExtractElementInst &I) {
- // TODO -> insert LOADI when the extracted vector is constant
- }
+ /*! Because there are still fake insert/extract instruction for
+ * load/store, so keep empty function here */
+ void GenWriter::regAllocateInsertElement(InsertElementInst &I) {}
+ void GenWriter::emitInsertElement(InsertElementInst &I) {}
- void GenWriter::regAllocateShuffleVectorInst(ShuffleVectorInst &I) {
- Value *first = I.getOperand(0);
- Value *second = I.getOperand(1);
- GBE_ASSERTM(!isa<Constant>(first) || isa<UndefValue>(first),
- "TODO support constant vector for shuffle");
- GBE_ASSERTM(!isa<Constant>(second) || isa<UndefValue>(second),
- "TODO support constant vector for shuffle");
- VectorType *dstType = cast<VectorType>(I.getType());
- VectorType *srcType = cast<VectorType>(first->getType());
- const uint32_t dstElemNum = dstType->getNumElements();
- const uint32_t srcElemNum = srcType->getNumElements();
- for (uint32_t elemID = 0; elemID < dstElemNum; ++elemID) {
- uint32_t srcID = I.getMaskValue(elemID);
- Value *src = first;
- if (srcID >= srcElemNum) {
- srcID -= srcElemNum;
- src = second;
- }
- regTranslator.newValueProxy(src, &I, srcID, elemID);
- }
- }
+ void GenWriter::regAllocateExtractElement(ExtractElementInst &I) {}
+ void GenWriter::emitExtractElement(ExtractElementInst &I) {}
+ void GenWriter::regAllocateShuffleVectorInst(ShuffleVectorInst &I) {}
void GenWriter::emitShuffleVectorInst(ShuffleVectorInst &I) {}
void GenWriter::regAllocateSelectInst(SelectInst &I) {
diff --git a/backend/src/llvm/llvm_gen_backend.hpp b/backend/src/llvm/llvm_gen_backend.hpp
index c270924..2ad879e 100644
--- a/backend/src/llvm/llvm_gen_backend.hpp
+++ b/backend/src/llvm/llvm_gen_backend.hpp
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
@@ -28,6 +28,9 @@
#include "llvm/Pass.h"
#include "sys/platform.hpp"
+#include "sys/map.hpp"
+#include "sys/hash_map.hpp"
+#include <algorithm>
// LLVM Type
namespace llvm { class Type; }
@@ -37,6 +40,29 @@ namespace gbe
// Final target of the Gen backend
namespace ir { class Unit; }
+ /*! All intrinsic Gen functions */
+ enum OCLInstrinsic {
+#define DECL_LLVM_GEN_FUNCTION(ID, NAME) GEN_OCL_##ID,
+#include "llvm_gen_ocl_function.hxx"
+#undef DECL_LLVM_GEN_FUNCTION
+ };
+
+ /*! Build the hash map for OCL functions on Gen */
+ struct OCLIntrinsicMap {
+ /*! Build the intrinsic hash map */
+ OCLIntrinsicMap(void) {
+#define DECL_LLVM_GEN_FUNCTION(ID, NAME) \
+ map.insert(std::make_pair(#NAME, GEN_OCL_##ID));
+#include "llvm_gen_ocl_function.hxx"
+#undef DECL_LLVM_GEN_FUNCTION
+ }
+ /*! Sort intrinsics with their names */
+ hash_map<std::string, OCLInstrinsic> map;
+ };
+
+ /*! Sort the OCL Gen instrinsic functions (built on pre-main) */
+ static const OCLIntrinsicMap instrinsicMap;
+
/*! Pad the offset */
uint32_t getPadding(uint32_t offset, uint32_t align);
@@ -55,6 +81,8 @@ namespace gbe
/*! Remove the GEP instructions */
llvm::BasicBlockPass *createRemoveGEPPass(const ir::Unit &unit);
+ llvm::FunctionPass* createScalarizePass(ir::Unit &unit);
+
} /* namespace gbe */
#endif /* __GBE_LLVM_GEN_BACKEND_HPP__ */
diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp
new file mode 100644
index 0000000..453a23a
--- /dev/null
+++ b/backend/src/llvm/llvm_scalarize.cpp
@@ -0,0 +1,801 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ * Heldge RHodin <alice.rhodin at alice-dsl.net>
+ */
+
+/**
+ * \file llvm_passes.cpp
+ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+ * \author Heldge RHodin <alice.rhodin at alice-dsl.net>
+ */
+
+/* THIS CODE IS DERIVED FROM GPL LLVM PTX BACKEND. CODE IS HERE:
+ * http://sourceforge.net/scm/?type=git&group_id=319085
+ * Note that however, the original author, Heldge Rhodin, granted me (Benjamin
+ * Segovia) the right to use another license for it (MIT here)
+ */
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Function.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "llvm/llvm_gen_backend.hpp"
+#include "ir/unit.hpp"
+#include "sys/map.hpp"
+
+
+using namespace llvm;
+
+namespace gbe {
+
+ struct VectorValues {
+ VectorValues() : vals()
+ { }
+
+ void setComponent(int c, llvm::Value* val)
+ {
+ assert(c >= 0 && c < 16 && "Out of bounds component");
+ vals[c] = val;
+ }
+ llvm::Value* getComponent(int c)
+ {
+ assert(c >= 0 && c < 16 && "Out of bounds component");
+ assert(vals[c] && "Requesting non-existing component");
+ return vals[c];
+ }
+
+ // {Value* x, Value* y, Value* z, Value* w}
+ llvm::Value* vals[16];
+ };
+
+ class Scalarize : public FunctionPass {
+
+ public:
+ // Standard pass stuff
+ static char ID;
+
+ Scalarize(ir::Unit& unit) : FunctionPass(ID), unit(unit)
+ {
+ initializeLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeDominatorTreePass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function&);
+ void print(raw_ostream&, const Module* = 0) const;
+ virtual void getAnalysisUsage(AnalysisUsage&) const;
+
+ protected:
+ // An instruction is valid post-scalarization iff it is fully scalar or it
+ // is a gla_loadn
+ bool isValid(const Instruction*);
+
+ // Take an instruction that produces a vector, and scalarize it
+ bool scalarize(Instruction*);
+ bool scalarizePerComponent(Instruction*);
+ bool scalarizeFuncCall(CallInst *);
+ bool scalarizeLoad(LoadInst*);
+ bool scalarizeStore(StoreInst*);
+ //bool scalarizeIntrinsic(IntrinsicInst*);
+ bool scalarizeExtract(ExtractElementInst*);
+ bool scalarizeInsert(InsertElementInst*);
+ bool scalarizeShuffleVector(ShuffleVectorInst*);
+ bool scalarizePHI(PHINode*);
+ void scalarizeArgs(Function& F);
+ // ...
+
+ // Helpers to make the actual multiple scalar calls, one per
+ // component. Updates the given VectorValues's components with the new
+ // Values.
+ void makeScalarizedCalls(Function*, ArrayRef<Value*>, int numComponents, VectorValues&);
+
+ void makePerComponentScalarizedCalls(Instruction*, ArrayRef<Value*>);
+
+ // Makes a scalar form of the given instruction: replaces the operands
+ // and chooses a correct return type
+ Instruction* createScalarInstruction(Instruction* inst, ArrayRef<Value*>);
+
+ // Gather the specified components in the given values. Returns the
+ // component if the given value is a vector, or the scalar itself.
+ void gatherComponents(int component, ArrayRef<Value*> args, SmallVectorImpl<Value*>& componentArgs);
+
+ // Get the assigned component for that value. If the value is a scalar,
+ // returns the scalar. If it's a constant, returns that component. If
+ // it's an instruction, returns the vectorValues of that instruction for
+ // that component
+ Value* getComponent(int component, Value*);
+
+ // Used for assertion purposes. Whether we can get the component out with
+ // a getComponent call
+ bool canGetComponent(Value*);
+
+ // Used for assertion purposes. Whether for every operand we can get
+ // components with a getComponent call
+ bool canGetComponentArgs(User*);
+
+ // Delete the instruction in the deadList
+ void dce();
+
+
+ int GetConstantInt(const Value* value);
+ bool IsPerComponentOp(const Instruction* inst);
+ bool IsPerComponentOp(const Value* value);
+
+ //these function used to add extract and insert instructions when load/store etc.
+ void extractFromeVector(Value* insn);
+ Value* InsertToVector(Value* insn, Value* vecValue);
+
+ Type* GetBasicType(Value* value) {
+ return GetBasicType(value->getType());
+ }
+
+ Type* GetBasicType(Type* type) {
+ switch(type->getTypeID()) {
+ case Type::VectorTyID:
+ case Type::ArrayTyID:
+ return GetBasicType(type->getContainedType(0));
+ default:
+ break;
+ }
+ return type;
+ }
+
+ int GetComponentCount(const Type* type) {
+ if (type->getTypeID() == Type::VectorTyID)
+ return llvm::dyn_cast<VectorType>(type)->getNumElements();
+ else
+ return 1;
+ }
+
+ int GetComponentCount(const Value* value) {
+ return GetComponentCount(value->getType());
+ }
+
+ DenseMap<Value*, VectorValues> vectorVals;
+ Module* module;
+ IRBuilder<>* builder;
+
+ Type* intTy;
+ Type* floatTy;
+ ir::Unit &unit;
+
+ std::vector<Instruction*> deadList;
+
+ // List of vector phis that were not completely scalarized because some
+ // of their operands hadn't before been visited (i.e. loop variant
+ // variables)
+ SmallVector<PHINode*, 16> incompletePhis;
+ };
+
+ Value* Scalarize::getComponent(int component, Value* v)
+ {
+ assert(canGetComponent(v) && "getComponent called on unhandled vector");
+
+ if (v->getType()->isVectorTy()) {
+ if (ConstantDataVector* c = dyn_cast<ConstantDataVector>(v)) {
+ return c->getElementAsConstant(component);
+ } else if (ConstantVector* c = dyn_cast<ConstantVector>(v)) {
+ return c->getOperand(component);
+ } else if (isa<ConstantAggregateZero>(v)) {
+ return Constant::getNullValue(GetBasicType(v));
+ } else if (isa<UndefValue>(v)) {
+ return UndefValue::get(GetBasicType(v));
+ } else {
+ return vectorVals[v].getComponent(component);
+ }
+ } else {
+ return v;
+ }
+ }
+
+ bool IsPerComponentOp(const llvm::Value* value)
+ {
+ const llvm::Instruction* inst = llvm::dyn_cast<const llvm::Instruction>(value);
+ return inst && IsPerComponentOp(inst);
+ }
+
+ bool Scalarize::IsPerComponentOp(const Instruction* inst)
+ {
+ //if (const IntrinsicInst* intr = dyn_cast<const IntrinsicInst>(inst))
+ // return IsPerComponentOp(intr);
+
+ if (inst->isTerminator())
+ return false;
+
+ switch (inst->getOpcode()) {
+
+ // Cast ops are only per-component if they cast back to the same vector
+ // width
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ return GetComponentCount(inst->getOperand(0)) == GetComponentCount(inst);
+
+ // Vector ops
+ case Instruction::InsertElement:
+ case Instruction::ExtractElement:
+ case Instruction::ShuffleVector:
+
+ // Ways of accessing/loading/storing vectors
+ case Instruction::ExtractValue:
+ case Instruction::InsertValue:
+
+ // Memory ops
+ case Instruction::Alloca:
+ case Instruction::Load:
+ case Instruction::Store:
+ case Instruction::GetElementPtr:
+ // Phis are a little special. We consider them not to be per-component
+ // because the mechanism of choice is a single value (what path we took to
+ // get here), and doesn't choose per-component (as select would). The caller
+ // should know to handle phis specially
+ case Instruction::PHI:
+ // Call insts, conservatively are no per-component
+ case Instruction::Call:
+ // Misc
+ case Instruction::LandingPad: //--- 3.0
+ case Instruction::VAArg:
+ return false;
+ } // end of switch (inst->getOpcode())
+
+ return true;
+ }
+ int Scalarize::GetConstantInt(const Value* value)
+ {
+ const ConstantInt *constantInt = dyn_cast<ConstantInt>(value);
+
+ // this might still be a constant expression, rather than a numeric constant,
+ // e.g., expression with undef's in it, so it was not folded
+ if (! constantInt)
+ NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("non-simple constant");
+
+ return constantInt->getValue().getSExtValue();
+ }
+ bool Scalarize::canGetComponent(Value* v)
+ {
+ if (v->getType()->isVectorTy()) {
+ if (isa<ConstantDataVector>(v) || isa<ConstantVector>(v) || isa<ConstantAggregateZero>(v) || isa<UndefValue>(v)) {
+ return true;
+ } else {
+ assert((isa<Instruction>(v) || isa<Argument>(v)) && "Non-constant non-instuction?");
+ return vectorVals.count(v);
+ }
+ } else {
+ return true;
+ }
+ }
+
+ bool Scalarize::canGetComponentArgs(User* u)
+ {
+ if (PHINode* phi = dyn_cast<PHINode>(u)) {
+ for (unsigned int i = 0; i < phi->getNumIncomingValues(); ++i)
+ if (!canGetComponent(phi->getIncomingValue(i)))
+ return false;
+ } else {
+ for (User::op_iterator i = u->op_begin(), e = u->op_end(); i != e; ++i)
+ if (!canGetComponent(*i))
+ return false;
+ }
+ return true;
+ }
+
+ void Scalarize::gatherComponents(int component, ArrayRef<Value*> args, SmallVectorImpl<Value*>& componentArgs)
+ {
+ componentArgs.clear();
+ for (ArrayRef<Value*>::iterator i = args.begin(), e = args.end(); i != e; ++i)
+ componentArgs.push_back(getComponent(component, *i));
+ }
+
+ Instruction* Scalarize::createScalarInstruction(Instruction* inst, ArrayRef<Value*> args)
+ {
+ // TODO: Refine the below into one large switch
+
+ unsigned op = inst->getOpcode();
+ if (inst->isCast()) {
+ assert(args.size() == 1 && "incorrect number of arguments for cast op");
+ return CastInst::Create((Instruction::CastOps)op, args[0], GetBasicType(inst));
+ }
+
+ if (inst->isBinaryOp()) {
+ assert(args.size() == 2 && "incorrect number of arguments for binary op");
+ return BinaryOperator::Create((Instruction::BinaryOps)op, args[0], args[1]);
+ }
+
+ if (PHINode* phi = dyn_cast<PHINode>(inst)) {
+ PHINode* res = PHINode::Create(GetBasicType(inst), phi->getNumIncomingValues());
+ assert(args.size() % 2 == 0 && "Odd number of arguments for a PHI");
+
+ // Loop over pairs of operands: [Value*, BasicBlock*]
+ for (unsigned int i = 0; i < args.size(); i++) {
+ BasicBlock* bb = phi->getIncomingBlock(i); //dyn_cast<BasicBlock>(args[i+1]);
+ //assert(bb && "Non-basic block incoming block?");
+ res->addIncoming(args[i], bb);
+ }
+
+ return res;
+ }
+
+ if (CmpInst* cmpInst = dyn_cast<CmpInst>(inst)) {
+ assert(args.size() == 2 && "incorrect number of arguments for comparison");
+ return CmpInst::Create(cmpInst->getOpcode(), cmpInst->getPredicate(), args[0], args[1]);
+ }
+
+ if (isa<SelectInst>(inst)) {
+ assert(args.size() == 3 && "incorrect number of arguments for select");
+ return SelectInst::Create(args[0], args[1], args[2]);
+ }
+
+ if (IntrinsicInst* intr = dyn_cast<IntrinsicInst>(inst)) {
+ if (! IsPerComponentOp(inst))
+ NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Scalarize instruction on a non-per-component intrinsic");
+
+ // TODO: Assumption is that all per-component intrinsics have all their
+ // arguments be overloadable. Need to find some way to assert on this
+ // assumption. This is due to how getDeclaration operates; it only takes
+ // a list of types that fit overloadable slots.
+ SmallVector<Type*, 8> tys(1, GetBasicType(inst->getType()));
+ // Call instructions have the decl as a last argument, so skip it
+ for (ArrayRef<Value*>::iterator i = args.begin(), e = args.end() - 1; i != e; ++i) {
+ tys.push_back(GetBasicType((*i)->getType()));
+ }
+
+ Function* f = Intrinsic::getDeclaration(module, intr->getIntrinsicID(), tys);
+ return CallInst::Create(f, args);
+ }
+
+ NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Currently unsupported instruction: ", inst->getOpcode(),
+ // inst->getOpcodeName());
+ return 0;
+
+ }
+
+
+ void Scalarize::makeScalarizedCalls(Function* f, ArrayRef<Value*> args, int count, VectorValues& vVals)
+ {
+ assert(count > 0 && count <= 16 && "invalid number of vector components");
+ for (int i = 0; i < count; ++i) {
+ Value* res;
+ SmallVector<Value*, 8> callArgs(args.begin(), args.end());
+ callArgs.push_back(ConstantInt::get(intTy, i));
+
+ res = builder->CreateCall(f, callArgs);
+ vVals.setComponent(i, res);
+ }
+ }
+
+ void Scalarize::makePerComponentScalarizedCalls(Instruction* inst, ArrayRef<Value*> args)
+ {
+ int count = GetComponentCount(inst);
+ assert(count > 0 && count <= 16 && "invalid number of vector components");
+ assert((inst->getNumOperands() == args.size() || isa<PHINode>(inst))
+ && "not enough arguments passed for instruction");
+
+ VectorValues& vVals = vectorVals[inst];
+
+ for (int i = 0; i < count; ++i) {
+ // Set this component of each arg
+ SmallVector<Value*, 8> callArgs(args.size(), 0);
+ gatherComponents(i, args, callArgs);
+
+ Instruction* res = createScalarInstruction(inst, callArgs);
+
+ vVals.setComponent(i, res);
+ builder->Insert(res);
+ }
+ }
+
+ bool Scalarize::isValid(const Instruction* inst)
+ {
+ // The result
+ if (inst->getType()->isVectorTy())
+ return false;
+
+ // The arguments
+ for (Instruction::const_op_iterator i = inst->op_begin(), e = inst->op_end(); i != e; ++i) {
+ const Value* v = (*i);
+ assert(v);
+ if (v->getType()->isVectorTy())
+ return false;
+ }
+
+ return true;
+ }
+
+ bool Scalarize::scalarize(Instruction* inst)
+ {
+ if (isValid(inst))
+ return false;
+
+ assert(! vectorVals.count(inst) && "We've already scalarized this somehow?");
+ assert((canGetComponentArgs(inst) || isa<PHINode>(inst)) &&
+ "Scalarizing an op whose arguments haven't been scalarized ");
+ builder->SetInsertPoint(inst);
+
+ if (IsPerComponentOp(inst))
+ return scalarizePerComponent(inst);
+
+ if (LoadInst* ld = dyn_cast<LoadInst>(inst))
+ return scalarizeLoad(ld);
+
+ if (CallInst* call = dyn_cast<CallInst>(inst))
+ return scalarizeFuncCall(call);
+
+ if (ExtractElementInst* extr = dyn_cast<ExtractElementInst>(inst))
+ return scalarizeExtract(extr);
+
+ if (InsertElementInst* ins = dyn_cast<InsertElementInst>(inst))
+ return scalarizeInsert(ins);
+
+ if (ShuffleVectorInst* sv = dyn_cast<ShuffleVectorInst>(inst))
+ return scalarizeShuffleVector(sv);
+
+ if (PHINode* phi = dyn_cast<PHINode>(inst))
+ return scalarizePHI(phi);
+
+ if (isa<ExtractValueInst>(inst) || isa<InsertValueInst>(inst))
+ // TODO: need to come up with a struct/array model for scalarization
+ NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Scalarizing struct/array ops");
+
+ if (StoreInst* st = dyn_cast<StoreInst>(inst))
+ return scalarizeStore(st);
+
+ NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Currently unhandled instruction ", inst->getOpcode(), inst->getOpcodeName());
+ return false;
+ }
+
+ bool Scalarize::scalarizeShuffleVector(ShuffleVectorInst* sv)
+ {
+ // %res = shuffleVector <n x ty> %foo, <n x ty> bar, <n x i32> <...>
+ // ==> nothing (just make a new VectorValues with the new components)
+ VectorValues& vVals = vectorVals[sv];
+
+ int size = GetComponentCount(sv);
+ int srcSize = GetComponentCount(sv->getOperand(0)->getType());
+
+ for (int i = 0; i < size; ++i) {
+ int select = sv->getMaskValue(i);
+
+ if (select < 0) {
+ vVals.setComponent(i, UndefValue::get(GetBasicType(sv->getOperand(0))));
+ continue;
+ }
+
+ // Otherwise look up the corresponding component from the correct
+ // source.
+ Value* selectee;
+ if (select < srcSize) {
+ selectee = sv->getOperand(0);
+ } else {
+ // Choose from the second operand
+ select -= srcSize;
+ selectee = sv->getOperand(1);
+ }
+
+ vVals.setComponent(i, getComponent(select, selectee));
+ }
+
+ return true;
+ }
+
+ bool Scalarize::scalarizePerComponent(Instruction* inst)
+ {
+ // dst = op <n x ty> %foo, <n x ty> %bar
+ // ==> dstx = op ty %foox, ty %barx
+ // dsty = op ty %fooy, ty %bary
+ // ...
+
+ SmallVector<Value*, 16> args(inst->op_begin(), inst->op_end());
+
+ makePerComponentScalarizedCalls(inst, args);
+
+ return true;
+ }
+
+ bool Scalarize::scalarizePHI(PHINode* phi)
+ {
+ // dst = phi <n x ty> [ %foo, %bb1 ], [ %bar, %bb2], ...
+ // ==> dstx = phi ty [ %foox, %bb1 ], [ %barx, %bb2], ...
+ // dsty = phi ty [ %fooy, %bb1 ], [ %bary, %bb2], ...
+
+ // If the scalar values are all known up-front, then just make the full
+ // phinode now. If they are not yet known (phinode for a loop variant
+ // variable), then deferr the arguments until later
+
+ if (canGetComponentArgs(phi)) {
+ SmallVector<Value*, 8> args(phi->op_begin(), phi->op_end());
+ makePerComponentScalarizedCalls(phi, args);
+ } else {
+ makePerComponentScalarizedCalls(phi, ArrayRef<Value*>());
+ incompletePhis.push_back(phi);
+ }
+
+ return true;
+ }
+
+ void Scalarize::extractFromeVector(Value* insn) {
+ VectorValues& vVals = vectorVals[insn];
+
+ for (int i = 0; i < GetComponentCount(insn); ++i) {
+ Value *cv = ConstantInt::get(intTy, i);
+ Value *EI = builder->CreateExtractElement(insn, cv);
+ vVals.setComponent(i, EI);
+ //unit.fakeInsnMap[EI] = insn;
+ unit.newValueProxy(insn, EI, i, 0);
+ }
+ }
+
+ Value* Scalarize::InsertToVector(Value * insn, Value* vecValue) {
+ //VectorValues& vVals = vectorVals[writeValue];
+ //unit.vecValuesMap[call] = vectorVals[writeValue];
+
+ //add fake insert instructions to avoid removed
+ Value *II = NULL;
+ for (int i = 0; i < GetComponentCount(vecValue); ++i) {
+ Value *vec = II ? II : UndefValue::get(vecValue->getType());
+ Value *cv = ConstantInt::get(intTy, i);
+ II = builder->CreateInsertElement(vec, getComponent(i, vecValue), cv);
+ //unit.vecValuesMap[insn].setComponent(i, getComponent(i, writeValue));
+ //unit.newValueProxy(getComponent(i, vecValue), vecValue, 0, i);
+ //unit.fakeInsnMap[II] = insn;
+ }
+
+ for (int i = 0; i < GetComponentCount(vecValue); ++i) {
+ unit.newValueProxy(getComponent(i, vecValue), II, 0, i);
+ }
+ return II;
+ }
+
+ bool Scalarize::scalarizeFuncCall(CallInst* call) {
+ if (Function *F = call->getCalledFunction()) {
+ if (F->getIntrinsicID() != 0) { //Intrinsic functions
+ NOT_IMPLEMENTED;
+ } else {
+ Value *Callee = call->getCalledValue();
+ const std::string fnName = Callee->getName();
+ auto it = instrinsicMap.map.find(fnName);
+ GBE_ASSERT(it != instrinsicMap.map.end());
+
+ // Get the function arguments
+ CallSite CS(call);
+ CallSite::arg_iterator CI = CS.arg_begin() + 3;
+
+ switch (it->second) {
+ default: break;
+ case GEN_OCL_READ_IMAGE0:
+ case GEN_OCL_READ_IMAGE1:
+ case GEN_OCL_READ_IMAGE2:
+ case GEN_OCL_READ_IMAGE3:
+ case GEN_OCL_READ_IMAGE4:
+ case GEN_OCL_READ_IMAGE5:
+ case GEN_OCL_READ_IMAGE10:
+ case GEN_OCL_READ_IMAGE11:
+ case GEN_OCL_READ_IMAGE12:
+ case GEN_OCL_READ_IMAGE13:
+ case GEN_OCL_READ_IMAGE14:
+ case GEN_OCL_READ_IMAGE15:
+ {
+ extractFromeVector(call);
+ break;
+ }
+ case GEN_OCL_WRITE_IMAGE10:
+ case GEN_OCL_WRITE_IMAGE11:
+ case GEN_OCL_WRITE_IMAGE12:
+ case GEN_OCL_WRITE_IMAGE13:
+ case GEN_OCL_WRITE_IMAGE14:
+ case GEN_OCL_WRITE_IMAGE15:
+ CI++;
+ case GEN_OCL_WRITE_IMAGE0:
+ case GEN_OCL_WRITE_IMAGE1:
+ case GEN_OCL_WRITE_IMAGE2:
+ case GEN_OCL_WRITE_IMAGE3:
+ case GEN_OCL_WRITE_IMAGE4:
+ case GEN_OCL_WRITE_IMAGE5:
+ {
+ *CI = InsertToVector(call, *CI);
+ break;
+ }
+ }
+ }
+ }
+ return false;
+ }
+
+ bool Scalarize::scalarizeLoad(LoadInst* ld)
+ {
+ extractFromeVector(ld);
+ return false;
+ }
+
+ bool Scalarize::scalarizeStore(StoreInst* st) {
+ st->setOperand(0, InsertToVector(st, st->getValueOperand()));
+ return false;
+ }
+
+ bool Scalarize::scalarizeExtract(ExtractElementInst* extr)
+ {
+ // %res = extractelement <n X ty> %foo, %i
+ // ==> nothing (just use %foo's %ith component instead of %res)
+
+ if (! isa<Constant>(extr->getOperand(1))) {
+ // TODO: Variably referenced components. Probably handle/emulate through
+ // a series of selects.
+ NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Variably referenced vector components");
+ }
+ //if (isa<Argument>(extr->getOperand(0)))
+ // return false;
+ int component = GetConstantInt(extr->getOperand(1));
+ Value* v = getComponent(component, extr->getOperand(0));
+ if(extr == v)
+ return false;
+ extr->replaceAllUsesWith(v);
+
+ return true;
+ }
+
+ bool Scalarize::scalarizeInsert(InsertElementInst* ins)
+ {
+ // %res = insertValue <n x ty> %foo, %i
+ // ==> nothing (just make a new VectorValues with the new component)
+
+ if (! isa<Constant>(ins->getOperand(2))) {
+ // TODO: Variably referenced components. Probably handle/emulate through
+ // a series of selects.
+ NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Variably referenced vector components");
+ }
+
+ int component = GetConstantInt(ins->getOperand(2));
+
+ VectorValues& vVals = vectorVals[ins];
+ for (int i = 0; i < GetComponentCount(ins); ++i) {
+ vVals.setComponent(i, i == component ? ins->getOperand(1)
+ : getComponent(i, ins->getOperand(0)));
+ }
+
+ return true;
+ }
+
+ void Scalarize::scalarizeArgs(Function& F) {
+ if (F.arg_empty())
+ return;
+ ReversePostOrderTraversal<Function*> rpot(&F);
+ BasicBlock::iterator instI = (*rpot.begin())->begin();
+ builder->SetInsertPoint(instI);
+
+ Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+
+#if LLVM_VERSION_MINOR <= 1
+ const AttrListPtr &PAL = F.getAttributes();
+ uint32_t argID = 1; // Start at one actually
+ for (; I != E; ++I, ++argID) {
+#else
+ for (; I != E; ++I) {
+#endif /* LLVM_VERSION_MINOR <= 1 */
+ Type *type = I->getType();
+
+ if(type->isVectorTy())
+ extractFromeVector(I);
+ }
+ return;
+ }
+
+ bool Scalarize::runOnFunction(Function& F)
+ {
+ switch (F.getCallingConv()) {
+ case CallingConv::PTX_Device:
+ return false;
+ case CallingConv::PTX_Kernel:
+ break;
+ default: GBE_ASSERTM(false, "Unsupported calling convention");
+ }
+
+ bool changed = false;
+ module = F.getParent();
+ intTy = IntegerType::get(module->getContext(), 32);
+ floatTy = Type::getFloatTy(module->getContext());
+ builder = new IRBuilder<>(module->getContext());
+
+ scalarizeArgs(F);
+
+ typedef ReversePostOrderTraversal<Function*> RPOTType;
+ RPOTType rpot(&F);
+ for (RPOTType::rpo_iterator bbI = rpot.begin(), bbE = rpot.end(); bbI != bbE; ++bbI) {
+ for (BasicBlock::iterator instI = (*bbI)->begin(), instE = (*bbI)->end(); instI != instE; ++instI) {
+ bool scalarized = scalarize(instI);
+ if (scalarized) {
+ changed = true;
+ // TODO: uncomment when done
+ deadList.push_back(instI);
+ }
+ }
+ }
+
+ // Fill in the incomplete phis
+ for (SmallVectorImpl<PHINode*>::iterator phiI = incompletePhis.begin(), phiE = incompletePhis.end();
+ phiI != phiE; ++phiI) {
+ assert(canGetComponentArgs(*phiI) && "Phi's operands never scalarized");
+
+ // Fill in each component of this phi
+ VectorValues& vVals = vectorVals[*phiI];
+ for (int c = 0; c < GetComponentCount(*phiI); ++c) {
+ PHINode* compPhi = dyn_cast<PHINode>(vVals.getComponent(c));
+ assert(compPhi && "Vector phi got scalarized to non-phis?");
+
+ // Loop over pairs of operands: [Value*, BasicBlock*]
+ for (unsigned int i = 0; i < (*phiI)->getNumOperands(); i++) {
+ BasicBlock* bb = (*phiI)->getIncomingBlock(i);
+ assert(bb && "Non-basic block incoming block?");
+ compPhi->addIncoming(getComponent(c, (*phiI)->getOperand(i)), bb);
+ }
+ }
+ }
+
+ dce();
+
+ delete builder;
+ builder = 0;
+
+ return changed;
+ }
+
+ void Scalarize::dce()
+ {
+ //two passes delete for some phinode
+ for (std::vector<Instruction*>::reverse_iterator i = deadList.rbegin(), e = deadList.rend(); i != e; ++i) {
+ (*i)->dropAllReferences();
+ if((*i)->use_empty())
+ (*i)->eraseFromParent();
+ }
+ for (std::vector<Instruction*>::reverse_iterator i = deadList.rbegin(), e = deadList.rend(); i != e; ++i) {
+ if((*i)->getParent())
+ (*i)->eraseFromParent();
+ }
+ deadList.clear();
+ }
+
+ void Scalarize::getAnalysisUsage(AnalysisUsage& AU) const
+ {
+ }
+
+ void Scalarize::print(raw_ostream&, const Module*) const
+ {
+ return;
+ }
+ FunctionPass* createScalarizePass(ir::Unit &unit)
+ {
+ return new Scalarize(unit);
+ }
+ char Scalarize::ID = 0;
+
+} // end namespace
diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
index ea3d9eb..559cde0 100644
--- a/backend/src/llvm/llvm_to_gen.cpp
+++ b/backend/src/llvm/llvm_to_gen.cpp
@@ -69,6 +69,7 @@ namespace gbe
// Print the code before further optimizations
if (OCL_OUTPUT_LLVM_BEFORE_EXTRA_PASS)
passes.add(createPrintModulePass(&*o));
+ passes.add(createScalarizePass(unit)); // Expand all vector ops
passes.add(createScalarReplAggregatesPass()); // Break up allocas
passes.add(createRemoveGEPPass(unit));
passes.add(createConstantPropagationPass());
--
1.7.9.5
More information about the Beignet
mailing list