[Beignet] [PATCH 1/3] Add a scalarize llvm pass.
Zhigang Gong
zhigang.gong at linux.intel.com
Thu May 16 22:53:48 PDT 2013
Some minor comments as below. In general, this patchset is really nice, now we
have a cleaner code to handle vectors. Thanks.
On Thu, May 16, 2013 at 12:36:33PM +0800, Yang Rong wrote:
> In this pass, expand all normal vector ops to scalar ops, except store/load, image read/write and function's argument. Add fake ExtractElement/InsertElement instructions to avoid dead instruction elimination, and unit valueMap hold the relationship between these fake instructions and real load/store instructions.
>
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
> backend/src/CMakeLists.txt | 1 +
> backend/src/ir/unit.hpp | 22 +-
> backend/src/llvm/llvm_gen_backend.cpp | 241 +++-------
> backend/src/llvm/llvm_gen_backend.hpp | 30 +-
> backend/src/llvm/llvm_scalarize.cpp | 801 +++++++++++++++++++++++++++++++++
> backend/src/llvm/llvm_to_gen.cpp | 1 +
> 6 files changed, 914 insertions(+), 182 deletions(-)
> create mode 100644 backend/src/llvm/llvm_scalarize.cpp
>
> diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
> index 1829964..183517a 100644
> --- a/backend/src/CMakeLists.txt
> +++ b/backend/src/CMakeLists.txt
> @@ -83,6 +83,7 @@ else (GBE_USE_BLOB)
> backend/program.h
> llvm/llvm_gen_backend.cpp
> llvm/llvm_passes.cpp
> + llvm/llvm_scalarize.cpp
> llvm/llvm_to_gen.cpp
> llvm/llvm_gen_backend.hpp
> llvm/llvm_gen_ocl_function.hxx
> diff --git a/backend/src/ir/unit.hpp b/backend/src/ir/unit.hpp
> index ae78638..3b293f5 100644
> --- a/backend/src/ir/unit.hpp
> +++ b/backend/src/ir/unit.hpp
> @@ -1,4 +1,4 @@
> -/*
> +/*
> * Copyright © 2012 Intel Corporation
> *
> * This library is free software; you can redistribute it and/or
> @@ -24,9 +24,12 @@
> #ifndef __GBE_IR_UNIT_HPP__
> #define __GBE_IR_UNIT_HPP__
>
> +#include "llvm/Value.h"
> +
> #include "ir/constant.hpp"
> #include "ir/register.hpp"
> #include "sys/hash_map.hpp"
> +#include "sys/map.hpp"
>
> namespace gbe {
> namespace ir {
> @@ -41,6 +44,7 @@ namespace ir {
> {
> public:
> typedef hash_map<std::string, Function*> FunctionSet;
> + typedef std::pair<llvm::Value*, uint32_t> ValueIndex;
> /*! Create an empty unit */
> Unit(PointerSize pointerSize = POINTER_32_BITS);
> /*! Release everything (*including* the function pointers) */
> @@ -71,11 +75,27 @@ namespace ir {
> ConstantSet& getConstantSet(void) { return constantSet; }
> /*! Return the constant set */
> const ConstantSet& getConstantSet(void) const { return constantSet; }
> +
> + /*! Some values will not be allocated. For example a vector extract and
> + * a vector insertion when scalarize the vector load/store
> + */
> + void newValueProxy(llvm::Value *real,
> + llvm::Value *fake,
> + uint32_t realIndex = 0u,
> + uint32_t fakeIndex = 0u) {
> + const ValueIndex key(fake, fakeIndex);
> + const ValueIndex value(real, realIndex);
> + GBE_ASSERT(valueMap.find(key) == valueMap.end()); // Do not insert twice
> + valueMap[key] = value;
> + }
> + /*! Return the value map */
> + const map<ValueIndex, ValueIndex>& getValueMap(void) const { return valueMap; }
> private:
> friend class ContextInterface; //!< Can free modify the unit
> hash_map<std::string, Function*> functions; //!< All the defined functions
> ConstantSet constantSet; //!< All the constants defined in the unit
> PointerSize pointerSize; //!< Size shared by all pointers
> + map<ValueIndex, ValueIndex> valueMap; //!< fake to real value map for vector load/store
> GBE_CLASS(Unit);
> };
>
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index 8dcf15c..3855011 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -1,4 +1,4 @@
> -/*
> +/*
> * Copyright © 2012 Intel Corporation
> *
> * This library is free software; you can redistribute it and/or
> @@ -60,7 +60,7 @@
> * dependencies on endianness or ABIs. Fortunately, the ptx (and nvptx for LLVM
> * 3.2) profile is pretty well adapted to our needs since NV and Gen GPU are
> * kind of similar, or at least they are similar enough to share the same front
> - * end.
> + * end.
> *
> * Problems
> * ========
> @@ -126,10 +126,8 @@
> #include "ir/context.hpp"
> #include "ir/unit.hpp"
> #include "ir/liveness.hpp"
> -#include "sys/map.hpp"
> #include "sys/set.hpp"
> #include "sys/cvar.hpp"
> -#include <algorithm>
>
> /* Not defined for LLVM 3.0 */
> #if !defined(LLVM_VERSION_MAJOR)
> @@ -207,7 +205,7 @@ namespace gbe
> /*! Type to register family translation */
> static ir::RegisterFamily getFamily(const ir::Context &ctx, const Type *type)
> {
> - GBE_ASSERT(isScalarType(type) == true);
> + GBE_ASSERT(isScalarType(type) == true);
> if (type == Type::getInt1Ty(type->getContext()))
> return ir::FAMILY_BOOL;
> if (type == Type::getInt8Ty(type->getContext()))
> @@ -269,6 +267,8 @@ namespace gbe
> class RegisterTranslator
> {
> public:
> + /*! Indices will be zero for scalar values */
> + typedef std::pair<Value*, uint32_t> ValueIndex;
> RegisterTranslator(ir::Context &ctx) : ctx(ctx) {}
>
> /*! Empty the maps */
> @@ -289,6 +289,11 @@ namespace gbe
> GBE_ASSERT(valueMap.find(key) == valueMap.end()); // Do not insert twice
> valueMap[key] = value;
> }
> + /*! After scalarize pass, there are some valueMap in unit,
> + * use this function to copy from unit valueMap */
> + void initValueMap(const map<ValueIndex, ValueIndex>& vMap) {
> + valueMap.insert(vMap.begin(), vMap.end());
> + }
> /*! Mostly used for the preallocated registers (lids, gids) */
> void newScalarProxy(ir::Register reg, Value *value, uint32_t index = 0u) {
> const ValueIndex key(value, index);
> @@ -325,10 +330,9 @@ namespace gbe
> };
> return ir::Register();
> }
> - /*! Get the register from the given value at given index possibly iterating
> - * in the value map to get the final real register
> - */
> - ir::Register getScalar(Value *value, uint32_t index = 0u) {
> +
> + /*! iterating in the value map to get the final real register */
> + void getRealValue(Value* &value, uint32_t& index) {
> auto end = valueMap.end();
> for (;;) {
> auto it = valueMap.find(std::make_pair(value, index));
> @@ -339,6 +343,14 @@ namespace gbe
> index = it->second.second;
> }
> }
> + }
> +
> + /*! Get the register from the given value at given index possibly iterating
> + * in the value map to get the final real register
> + */
> + ir::Register getScalar(Value *value, uint32_t index = 0u) {
> + getRealValue(value, index);
> +
> const auto key = std::make_pair(value, index);
> GBE_ASSERT(scalarMap.find(key) != scalarMap.end());
> return scalarMap[key];
> @@ -351,16 +363,8 @@ namespace gbe
> }
> /*! Says if the value exists. Otherwise, it is undefined */
> bool valueExists(Value *value, uint32_t index) {
> - auto end = valueMap.end();
> - for (;;) {
> - auto it = valueMap.find(std::make_pair(value, index));
> - if (it == end)
> - break;
> - else {
> - value = it->second.first;
> - index = it->second.second;
> - }
> - }
> + getRealValue(value, index);
> +
> const auto key = std::make_pair(value, index);
> return scalarMap.find(key) != scalarMap.end();
> }
> @@ -375,8 +379,6 @@ namespace gbe
> this->insertRegister(reg, key, index);
> return reg;
> }
> - /*! Indices will be zero for scalar values */
> - typedef std::pair<Value*, uint32_t> ValueIndex;
> /*! Map value to ir::Register */
> map<ValueIndex, ir::Register> scalarMap;
> /*! Map values to values when this is only a translation (eq bitcast) */
> @@ -384,28 +386,6 @@ namespace gbe
> /*! Actually allocates the registers */
> ir::Context &ctx;
> };
> - /*! All intrinsic Gen functions */
> - enum OCLInstrinsic {
> -#define DECL_LLVM_GEN_FUNCTION(ID, NAME) GEN_OCL_##ID,
> -#include "llvm_gen_ocl_function.hxx"
> -#undef DECL_LLVM_GEN_FUNCTION
> - };
> -
> - /*! Build the hash map for OCL functions on Gen */
> - struct OCLIntrinsicMap {
> - /*! Build the intrinsic hash map */
> - OCLIntrinsicMap(void) {
> -#define DECL_LLVM_GEN_FUNCTION(ID, NAME) \
> - map.insert(std::make_pair(#NAME, GEN_OCL_##ID));
> -#include "llvm_gen_ocl_function.hxx"
> -#undef DECL_LLVM_GEN_FUNCTION
> - }
> - /*! Sort intrinsics with their names */
> - hash_map<std::string, OCLInstrinsic> map;
> - };
> -
> - /*! Sort the OCL Gen instrinsic functions (built on pre-main) */
> - static const OCLIntrinsicMap instrinsicMap;
>
> /*! Translate LLVM IR code to Gen IR code */
> class GenWriter : public FunctionPass, public InstVisitor<GenWriter>
> @@ -423,7 +403,7 @@ namespace gbe
> */
> set<const Value*> conditionSet;
> /*! We visit each function twice. Once to allocate the registers and once to
> - * emit the Gen IR instructions
> + * emit the Gen IR instructions
> */
> enum Pass {
> PASS_EMIT_REGISTERS = 0,
> @@ -663,7 +643,7 @@ namespace gbe
> if (dyn_cast<ConstantAggregateZero>(CPV)) {
> return doIt(uint32_t(0)); // XXX Handle type
> } else {
> - if (dyn_cast<ConstantVector>(CPV))
> + if (dyn_cast<ConstantVector>(CPV))
> CPV = extractConstantElem(CPV, index);
> GBE_ASSERTM(dyn_cast<ConstantExpr>(CPV) == NULL, "Unsupported constant expression");
>
> @@ -756,6 +736,9 @@ namespace gbe
> }
>
> ir::Register GenWriter::getRegister(Value *value, uint32_t elemID) {
> + //the real value may be constant, so get real value before constant check
> + regTranslator.getRealValue(value, elemID);
> +
> if (dyn_cast<ConstantExpr>(value)) {
> ConstantExpr *ce = dyn_cast<ConstantExpr>(value);
> if(ce->isCast()) {
> @@ -867,6 +850,7 @@ namespace gbe
> "Returned value for kernel functions is forbidden");
> // Loop over the arguments and output registers for them
> if (!F.arg_empty()) {
> + uint32_t argID = 0;
> Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
>
> // Insert a new register for each function argument
> @@ -875,10 +859,33 @@ namespace gbe
> uint32_t argID = 1; // Start at one actually
> for (; I != E; ++I, ++argID) {
> #else
> - for (; I != E; ++I) {
> + for (; I != E; ++I, ++argID) {
> #endif /* LLVM_VERSION_MINOR <= 1 */
> const std::string &argName = I->getName().str();
> Type *type = I->getType();
> +
> + //add support for vector argument
> + if(type->isVectorTy()) {
> + VectorType *vectorType = cast<VectorType>(type);
> +
> + this->newRegister(I);
> + ir::Register reg = getRegister(I, 0);
> +
> + Type *elemType = vectorType->getElementType();
> + const uint32_t elemSize = getTypeByteSize(unit, elemType);
> + const uint32_t elemNum = vectorType->getNumElements();
> + //vector's elemType always scalar type
> + ctx.input(argName, ir::FunctionArgument::VALUE, reg, elemNum*elemSize);
> +
> + ir::Function& fn = ctx.getFunction();
> + for(uint32_t i=1; i < elemNum; i++) {
> + ir::PushLocation argLocation(fn, argID, elemSize*i);
> + reg = getRegister(I, i);
> + ctx.appendPushedConstant(reg, argLocation); //add to push map for reg alloc
> + }
> + continue;
> + }
> +
> GBE_ASSERTM(isScalarType(type) == true,
> "vector type in the function argument is not supported yet");
> const ir::Register reg = regTranslator.newScalar(I);
> @@ -916,7 +923,6 @@ namespace gbe
> ctx.input(argName, ir::FunctionArgument::IMAGE, reg, ptrSize);
> ctx.getFunction().getImageSet()->append(reg, &ctx);
> break;
> - break;
> default: GBE_ASSERT(addrSpace != ir::MEM_PRIVATE);
> }
> }
> @@ -1141,6 +1147,7 @@ namespace gbe
>
> ctx.startFunction(F.getName());
> this->regTranslator.clear();
> + this->regTranslator.initValueMap(unit.getValueMap());
> this->labelMap.clear();
> this->emitFunctionPrototype(F);
>
> @@ -1495,141 +1502,15 @@ namespace gbe
> ir::Context &ctx;
> };
>
> - void GenWriter::regAllocateInsertElement(InsertElementInst &I) {
> - Value *modified = I.getOperand(0);
> - Value *toInsert = I.getOperand(1);
> - Value *index = I.getOperand(2);
> -
> - // Get the index for the insertion
> - Constant *CPV = dyn_cast<Constant>(index);
> - GBE_ASSERTM(CPV != NULL, "only constant indices when inserting values");
> - auto x = processConstant<ir::Immediate>(CPV, InsertExtractFunctor(ctx));
> - GBE_ASSERTM(x.type == ir::TYPE_U32 || x.type == ir::TYPE_S32,
> - "Invalid index type for InsertElement");
> -
> - // Crash on overrun
> - VectorType *vectorType = cast<VectorType>(modified->getType());
> - const uint32_t elemNum = vectorType->getNumElements();
> - const uint32_t modifiedID = x.data.u32;
> - GBE_ASSERTM(modifiedID < elemNum, "Out-of-bound index for InsertElement");
> -
> - // The source vector is not constant
> - if (!isa<Constant>(modified) || isa<UndefValue>(modified)) {
> - // Non modified values are just proxies
> - for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
> - if (elemID != modifiedID)
> - regTranslator.newValueProxy(modified, &I, elemID, elemID);
> - }
> - // The source vector is constant
> - else {
> - // Non modified values will use LOADI
> - for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
> - if (elemID != modifiedID) {
> - const ir::Type type = getType(ctx, toInsert->getType());
> - const ir::Register reg = ctx.reg(getFamily(type));
> - regTranslator.insertRegister(reg, &I, elemID);
> - }
> - }
> -
> - // If the element to insert is an immediate we will generate a LOADI.
> - // Otherwise, the value is just a proxy of the inserted value
> - if (dyn_cast<Constant>(toInsert) != NULL) {
> - const ir::Type type = getType(ctx, toInsert->getType());
> - const ir::Register reg = ctx.reg(getFamily(type));
> - regTranslator.insertRegister(reg, &I, modifiedID);
> - } else
> - regTranslator.newValueProxy(toInsert, &I, 0, modifiedID);
> - }
> -
> - void GenWriter::emitInsertElement(InsertElementInst &I) {
> - // Note that we check everything in regAllocateInsertElement
> - Value *modified = I.getOperand(0);
> - Value *toInsert = I.getOperand(1);
> - Value *index = I.getOperand(2);
> -
> - // Get the index of the value to insert
> - Constant *indexCPV = dyn_cast<Constant>(index);
> - auto x = processConstant<ir::Immediate>(indexCPV, InsertExtractFunctor(ctx));
> - const uint32_t modifiedID = x.data.u32;
> -
> - // The source vector is constant. We need to insert LOADI for the unmodified
> - // values
> - if (isa<Constant>(modified) && !isa<UndefValue>(modified)) {
> - VectorType *vectorType = cast<VectorType>(modified->getType());
> - const uint32_t elemNum = vectorType->getNumElements();
> - for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
> - if (elemID != modifiedID) {
> - Constant *sourceCPV = dyn_cast<Constant>(modified);
> - if (isa<UndefValue>(extractConstantElem(sourceCPV, elemID)) == false) {
> - const ir::ImmediateIndex immIndex = this->newImmediate(sourceCPV, elemID);
> - const ir::Immediate imm = ctx.getImmediate(immIndex);
> - const ir::Register reg = regTranslator.getScalar(&I, elemID);
> - ctx.LOADI(imm.type, reg, immIndex);
> - }
> - }
> - }
> -
> - // If the inserted value is not a constant, we just use a proxy
> - if (dyn_cast<Constant>(toInsert) == NULL)
> - return;
> -
> - // We need a LOADI if we insert an immediate
> - Constant *toInsertCPV = dyn_cast<Constant>(toInsert);
> - const ir::ImmediateIndex immIndex = this->newImmediate(toInsertCPV);
> - const ir::Immediate imm = ctx.getImmediate(immIndex);
> - const ir::Register reg = regTranslator.getScalar(&I, modifiedID);
> - ctx.LOADI(imm.type, reg, immIndex);
> - }
> -
> - void GenWriter::regAllocateExtractElement(ExtractElementInst &I) {
> - Value *extracted = I.getOperand(0);
> - Value *index = I.getOperand(1);
> - GBE_ASSERTM(isa<Constant>(extracted) == false,
> - "TODO support constant vector for extract");
> - Constant *CPV = dyn_cast<Constant>(index);
> - GBE_ASSERTM(CPV != NULL, "only constant indices when inserting values");
> - auto x = processConstant<ir::Immediate>(CPV, InsertExtractFunctor(ctx));
> - GBE_ASSERTM(x.type == ir::TYPE_U32 || x.type == ir::TYPE_S32,
> - "Invalid index type for InsertElement");
> -
> - // Crash on overrun
> - const uint32_t extractedID = x.data.u32;
> -#if GBE_DEBUG
> - VectorType *vectorType = cast<VectorType>(extracted->getType());
> - const uint32_t elemNum = vectorType->getNumElements();
> - GBE_ASSERTM(extractedID < elemNum, "Out-of-bound index for InsertElement");
> -#endif /* GBE_DEBUG */
> -
> - // Easy when the vector is not immediate
> - regTranslator.newValueProxy(extracted, &I, extractedID, 0);
> - }
> -
> - void GenWriter::emitExtractElement(ExtractElementInst &I) {
> - // TODO -> insert LOADI when the extracted vector is constant
> - }
> + /*! Because there are still fake insert/extract instruction for
> + * load/store, so keep empty function here */
> + void GenWriter::regAllocateInsertElement(InsertElementInst &I) {}
> + void GenWriter::emitInsertElement(InsertElementInst &I) {}
>
> - void GenWriter::regAllocateShuffleVectorInst(ShuffleVectorInst &I) {
> - Value *first = I.getOperand(0);
> - Value *second = I.getOperand(1);
> - GBE_ASSERTM(!isa<Constant>(first) || isa<UndefValue>(first),
> - "TODO support constant vector for shuffle");
> - GBE_ASSERTM(!isa<Constant>(second) || isa<UndefValue>(second),
> - "TODO support constant vector for shuffle");
> - VectorType *dstType = cast<VectorType>(I.getType());
> - VectorType *srcType = cast<VectorType>(first->getType());
> - const uint32_t dstElemNum = dstType->getNumElements();
> - const uint32_t srcElemNum = srcType->getNumElements();
> - for (uint32_t elemID = 0; elemID < dstElemNum; ++elemID) {
> - uint32_t srcID = I.getMaskValue(elemID);
> - Value *src = first;
> - if (srcID >= srcElemNum) {
> - srcID -= srcElemNum;
> - src = second;
> - }
> - regTranslator.newValueProxy(src, &I, srcID, elemID);
> - }
> - }
> + void GenWriter::regAllocateExtractElement(ExtractElementInst &I) {}
> + void GenWriter::emitExtractElement(ExtractElementInst &I) {}
>
> + void GenWriter::regAllocateShuffleVectorInst(ShuffleVectorInst &I) {}
> void GenWriter::emitShuffleVectorInst(ShuffleVectorInst &I) {}
>
> void GenWriter::regAllocateSelectInst(SelectInst &I) {
> diff --git a/backend/src/llvm/llvm_gen_backend.hpp b/backend/src/llvm/llvm_gen_backend.hpp
> index c270924..2ad879e 100644
> --- a/backend/src/llvm/llvm_gen_backend.hpp
> +++ b/backend/src/llvm/llvm_gen_backend.hpp
> @@ -1,4 +1,4 @@
> -/*
> +/*
> * Copyright © 2012 Intel Corporation
> *
> * This library is free software; you can redistribute it and/or
> @@ -28,6 +28,9 @@
>
> #include "llvm/Pass.h"
> #include "sys/platform.hpp"
> +#include "sys/map.hpp"
> +#include "sys/hash_map.hpp"
> +#include <algorithm>
>
> // LLVM Type
> namespace llvm { class Type; }
> @@ -37,6 +40,29 @@ namespace gbe
> // Final target of the Gen backend
> namespace ir { class Unit; }
>
> + /*! All intrinsic Gen functions */
> + enum OCLInstrinsic {
> +#define DECL_LLVM_GEN_FUNCTION(ID, NAME) GEN_OCL_##ID,
> +#include "llvm_gen_ocl_function.hxx"
> +#undef DECL_LLVM_GEN_FUNCTION
> + };
> +
> + /*! Build the hash map for OCL functions on Gen */
> + struct OCLIntrinsicMap {
> + /*! Build the intrinsic hash map */
> + OCLIntrinsicMap(void) {
> +#define DECL_LLVM_GEN_FUNCTION(ID, NAME) \
> + map.insert(std::make_pair(#NAME, GEN_OCL_##ID));
> +#include "llvm_gen_ocl_function.hxx"
> +#undef DECL_LLVM_GEN_FUNCTION
> + }
> + /*! Sort intrinsics with their names */
> + hash_map<std::string, OCLInstrinsic> map;
> + };
> +
> + /*! Sort the OCL Gen instrinsic functions (built on pre-main) */
> + static const OCLIntrinsicMap instrinsicMap;
> +
> /*! Pad the offset */
> uint32_t getPadding(uint32_t offset, uint32_t align);
>
> @@ -55,6 +81,8 @@ namespace gbe
> /*! Remove the GEP instructions */
> llvm::BasicBlockPass *createRemoveGEPPass(const ir::Unit &unit);
>
> + llvm::FunctionPass* createScalarizePass(ir::Unit &unit);
> +
> } /* namespace gbe */
>
> #endif /* __GBE_LLVM_GEN_BACKEND_HPP__ */
> diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp
> new file mode 100644
> index 0000000..453a23a
> --- /dev/null
> +++ b/backend/src/llvm/llvm_scalarize.cpp
> @@ -0,0 +1,801 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see <http://www.gnu.org/licenses/>.
> + *
> + * Author: Benjamin Segovia <benjamin.segovia at intel.com>
> + * Heldge RHodin <alice.rhodin at alice-dsl.net>
> + */
> +
> +/**
> + * \file llvm_passes.cpp
~~~~~~~~~~~~~~~ wrong file name.
> + * \author Benjamin Segovia <benjamin.segovia at intel.com>
> + * \author Heldge RHodin <alice.rhodin at alice-dsl.net>
> + */
The above author information seems not correct. As we discussed on the IM,
I checked the link: https://code.google.com/p/lunarglass/source/browse/trunk/Core/Passes/Transforms/Scalarize.cpp?r=605
And found the following statement, I suggest you to keep this statement as is and add intel statement on top of it.
//===- Scalarize.cpp - Scalarize LunarGLASS IR ----------------------------===//
//
// LunarGLASS: An Open Modular Shader Compiler Architecture
// Copyright (C) 2010-2011 LunarG, Inc.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; version 2 of the
// License.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
// 02110-1301, USA.
//
//===----------------------------------------------------------------------===//
//
// Author: Michael Ilseman, LunarG
//
//===----------------------------------------------------------------------===//
//
// Scalarize the IR.
// * Loads of uniforms become multiple loadComponent calls
//
// * Reads/writes become read/writeComponent calls
//
// * Component-wise operations become multiple ops over each component
//
// * Texture call become recomponsed texture calls
//
// * Vector ops disappear, with their users referring to the scalarized
// * components
//
//===----------------------------------------------------------------------===//
> +
> +/* THIS CODE IS DERIVED FROM GPL LLVM PTX BACKEND. CODE IS HERE:
> + * http://sourceforge.net/scm/?type=git&group_id=319085
> + * Note that however, the original author, Heldge Rhodin, granted me (Benjamin
> + * Segovia) the right to use another license for it (MIT here)
> + */
> +
> +#include "llvm/ADT/DenseMap.h"
> +#include "llvm/ADT/PostOrderIterator.h"
> +#include "llvm/Function.h"
> +#include "llvm/InstrTypes.h"
> +#include "llvm/Instructions.h"
> +#include "llvm/IntrinsicInst.h"
> +#include "llvm/Module.h"
> +#include "llvm/Pass.h"
> +#include "llvm/IRBuilder.h"
> +#include "llvm/Support/CallSite.h"
> +#include "llvm/Support/CFG.h"
> +#include "llvm/Support/raw_ostream.h"
> +
> +#include "llvm/llvm_gen_backend.hpp"
> +#include "ir/unit.hpp"
> +#include "sys/map.hpp"
> +
> +
> +using namespace llvm;
> +
> +namespace gbe {
> +
> + struct VectorValues {
> + VectorValues() : vals()
> + { }
> +
> + void setComponent(int c, llvm::Value* val)
> + {
> + assert(c >= 0 && c < 16 && "Out of bounds component");
> + vals[c] = val;
> + }
> + llvm::Value* getComponent(int c)
> + {
> + assert(c >= 0 && c < 16 && "Out of bounds component");
> + assert(vals[c] && "Requesting non-existing component");
> + return vals[c];
> + }
> +
> + // {Value* x, Value* y, Value* z, Value* w}
> + llvm::Value* vals[16];
> + };
> +
> + class Scalarize : public FunctionPass {
> +
> + public:
> + // Standard pass stuff
> + static char ID;
> +
> + Scalarize(ir::Unit& unit) : FunctionPass(ID), unit(unit)
> + {
> + initializeLoopInfoPass(*PassRegistry::getPassRegistry());
> + initializeDominatorTreePass(*PassRegistry::getPassRegistry());
> + }
> +
> + virtual bool runOnFunction(Function&);
> + void print(raw_ostream&, const Module* = 0) const;
> + virtual void getAnalysisUsage(AnalysisUsage&) const;
> +
> + protected:
> + // An instruction is valid post-scalarization iff it is fully scalar or it
> + // is a gla_loadn
> + bool isValid(const Instruction*);
> +
> + // Take an instruction that produces a vector, and scalarize it
> + bool scalarize(Instruction*);
> + bool scalarizePerComponent(Instruction*);
> + bool scalarizeFuncCall(CallInst *);
> + bool scalarizeLoad(LoadInst*);
> + bool scalarizeStore(StoreInst*);
> + //bool scalarizeIntrinsic(IntrinsicInst*);
> + bool scalarizeExtract(ExtractElementInst*);
> + bool scalarizeInsert(InsertElementInst*);
> + bool scalarizeShuffleVector(ShuffleVectorInst*);
> + bool scalarizePHI(PHINode*);
> + void scalarizeArgs(Function& F);
> + // ...
> +
> + // Helpers to make the actual multiple scalar calls, one per
> + // component. Updates the given VectorValues's components with the new
> + // Values.
> + void makeScalarizedCalls(Function*, ArrayRef<Value*>, int numComponents, VectorValues&);
> +
> + void makePerComponentScalarizedCalls(Instruction*, ArrayRef<Value*>);
> +
> + // Makes a scalar form of the given instruction: replaces the operands
> + // and chooses a correct return type
> + Instruction* createScalarInstruction(Instruction* inst, ArrayRef<Value*>);
> +
> + // Gather the specified components in the given values. Returns the
> + // component if the given value is a vector, or the scalar itself.
> + void gatherComponents(int component, ArrayRef<Value*> args, SmallVectorImpl<Value*>& componentArgs);
> +
> + // Get the assigned component for that value. If the value is a scalar,
> + // returns the scalar. If it's a constant, returns that component. If
> + // it's an instruction, returns the vectorValues of that instruction for
> + // that component
> + Value* getComponent(int component, Value*);
> +
> + // Used for assertion purposes. Whether we can get the component out with
> + // a getComponent call
> + bool canGetComponent(Value*);
> +
> + // Used for assertion purposes. Whether for every operand we can get
> + // components with a getComponent call
> + bool canGetComponentArgs(User*);
> +
> + // Delete the instruction in the deadList
> + void dce();
> +
> +
> + int GetConstantInt(const Value* value);
> + bool IsPerComponentOp(const Instruction* inst);
> + bool IsPerComponentOp(const Value* value);
> +
> + //these function used to add extract and insert instructions when load/store etc.
> + void extractFromeVector(Value* insn);
~~~~~~~~~~~~~~~~~~ should it be extractFromVector, there is a e between From and Vector, which I think should be a typo, right?
> + Value* InsertToVector(Value* insn, Value* vecValue);
> +
> + Type* GetBasicType(Value* value) {
> + return GetBasicType(value->getType());
> + }
> +
> + Type* GetBasicType(Type* type) {
> + switch(type->getTypeID()) {
> + case Type::VectorTyID:
> + case Type::ArrayTyID:
> + return GetBasicType(type->getContainedType(0));
> + default:
> + break;
> + }
> + return type;
> + }
> +
> + int GetComponentCount(const Type* type) {
> + if (type->getTypeID() == Type::VectorTyID)
> + return llvm::dyn_cast<VectorType>(type)->getNumElements();
> + else
> + return 1;
> + }
> +
> + int GetComponentCount(const Value* value) {
> + return GetComponentCount(value->getType());
> + }
> +
> + DenseMap<Value*, VectorValues> vectorVals;
> + Module* module;
> + IRBuilder<>* builder;
> +
> + Type* intTy;
> + Type* floatTy;
> + ir::Unit &unit;
> +
> + std::vector<Instruction*> deadList;
> +
> + // List of vector phis that were not completely scalarized because some
> + // of their operands hadn't before been visited (i.e. loop variant
> + // variables)
> + SmallVector<PHINode*, 16> incompletePhis;
> + };
> +
> + Value* Scalarize::getComponent(int component, Value* v)
> + {
> + assert(canGetComponent(v) && "getComponent called on unhandled vector");
> +
> + if (v->getType()->isVectorTy()) {
> + if (ConstantDataVector* c = dyn_cast<ConstantDataVector>(v)) {
> + return c->getElementAsConstant(component);
> + } else if (ConstantVector* c = dyn_cast<ConstantVector>(v)) {
> + return c->getOperand(component);
> + } else if (isa<ConstantAggregateZero>(v)) {
> + return Constant::getNullValue(GetBasicType(v));
> + } else if (isa<UndefValue>(v)) {
> + return UndefValue::get(GetBasicType(v));
> + } else {
> + return vectorVals[v].getComponent(component);
> + }
> + } else {
> + return v;
> + }
> + }
> +
> + bool IsPerComponentOp(const llvm::Value* value)
> + {
> + const llvm::Instruction* inst = llvm::dyn_cast<const llvm::Instruction>(value);
> + return inst && IsPerComponentOp(inst);
> + }
> +
> + bool Scalarize::IsPerComponentOp(const Instruction* inst)
> + {
> + //if (const IntrinsicInst* intr = dyn_cast<const IntrinsicInst>(inst))
> + // return IsPerComponentOp(intr);
> +
> + if (inst->isTerminator())
> + return false;
> +
> + switch (inst->getOpcode()) {
> +
> + // Cast ops are only per-component if they cast back to the same vector
> + // width
> + case Instruction::Trunc:
> + case Instruction::ZExt:
> + case Instruction::SExt:
> + case Instruction::FPToUI:
> + case Instruction::FPToSI:
> + case Instruction::UIToFP:
> + case Instruction::SIToFP:
> + case Instruction::FPTrunc:
> + case Instruction::FPExt:
> + case Instruction::PtrToInt:
> + case Instruction::IntToPtr:
> + case Instruction::BitCast:
> + return GetComponentCount(inst->getOperand(0)) == GetComponentCount(inst);
> +
> + // Vector ops
> + case Instruction::InsertElement:
> + case Instruction::ExtractElement:
> + case Instruction::ShuffleVector:
> +
> + // Ways of accessing/loading/storing vectors
> + case Instruction::ExtractValue:
> + case Instruction::InsertValue:
> +
> + // Memory ops
> + case Instruction::Alloca:
> + case Instruction::Load:
> + case Instruction::Store:
> + case Instruction::GetElementPtr:
> + // Phis are a little special. We consider them not to be per-component
> + // because the mechanism of choice is a single value (what path we took to
> + // get here), and doesn't choose per-component (as select would). The caller
> + // should know to handle phis specially
> + case Instruction::PHI:
> + // Call insts, conservatively are no per-component
> + case Instruction::Call:
> + // Misc
> + case Instruction::LandingPad: //--- 3.0
> + case Instruction::VAArg:
> + return false;
> + } // end of switch (inst->getOpcode())
> +
> + return true;
> + }
> + int Scalarize::GetConstantInt(const Value* value)
> + {
> + const ConstantInt *constantInt = dyn_cast<ConstantInt>(value);
> +
> + // this might still be a constant expression, rather than a numeric constant,
> + // e.g., expression with undef's in it, so it was not folded
> + if (! constantInt)
> + NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("non-simple constant");
> +
> + return constantInt->getValue().getSExtValue();
> + }
> + bool Scalarize::canGetComponent(Value* v)
> + {
> + if (v->getType()->isVectorTy()) {
> + if (isa<ConstantDataVector>(v) || isa<ConstantVector>(v) || isa<ConstantAggregateZero>(v) || isa<UndefValue>(v)) {
> + return true;
> + } else {
> + assert((isa<Instruction>(v) || isa<Argument>(v)) && "Non-constant non-instuction?");
> + return vectorVals.count(v);
> + }
> + } else {
> + return true;
> + }
> + }
> +
> + bool Scalarize::canGetComponentArgs(User* u)
> + {
> + if (PHINode* phi = dyn_cast<PHINode>(u)) {
> + for (unsigned int i = 0; i < phi->getNumIncomingValues(); ++i)
> + if (!canGetComponent(phi->getIncomingValue(i)))
> + return false;
> + } else {
> + for (User::op_iterator i = u->op_begin(), e = u->op_end(); i != e; ++i)
> + if (!canGetComponent(*i))
> + return false;
> + }
> + return true;
> + }
> +
> + void Scalarize::gatherComponents(int component, ArrayRef<Value*> args, SmallVectorImpl<Value*>& componentArgs)
> + {
> + componentArgs.clear();
> + for (ArrayRef<Value*>::iterator i = args.begin(), e = args.end(); i != e; ++i)
> + componentArgs.push_back(getComponent(component, *i));
> + }
> +
> + Instruction* Scalarize::createScalarInstruction(Instruction* inst, ArrayRef<Value*> args)
> + {
> + // TODO: Refine the below into one large switch
> +
> + unsigned op = inst->getOpcode();
> + if (inst->isCast()) {
> + assert(args.size() == 1 && "incorrect number of arguments for cast op");
> + return CastInst::Create((Instruction::CastOps)op, args[0], GetBasicType(inst));
> + }
> +
> + if (inst->isBinaryOp()) {
> + assert(args.size() == 2 && "incorrect number of arguments for binary op");
> + return BinaryOperator::Create((Instruction::BinaryOps)op, args[0], args[1]);
> + }
> +
> + if (PHINode* phi = dyn_cast<PHINode>(inst)) {
> + PHINode* res = PHINode::Create(GetBasicType(inst), phi->getNumIncomingValues());
> + assert(args.size() % 2 == 0 && "Odd number of arguments for a PHI");
> +
> + // Loop over pairs of operands: [Value*, BasicBlock*]
> + for (unsigned int i = 0; i < args.size(); i++) {
> + BasicBlock* bb = phi->getIncomingBlock(i); //dyn_cast<BasicBlock>(args[i+1]);
> + //assert(bb && "Non-basic block incoming block?");
> + res->addIncoming(args[i], bb);
> + }
> +
> + return res;
> + }
> +
> + if (CmpInst* cmpInst = dyn_cast<CmpInst>(inst)) {
> + assert(args.size() == 2 && "incorrect number of arguments for comparison");
> + return CmpInst::Create(cmpInst->getOpcode(), cmpInst->getPredicate(), args[0], args[1]);
> + }
> +
> + if (isa<SelectInst>(inst)) {
> + assert(args.size() == 3 && "incorrect number of arguments for select");
> + return SelectInst::Create(args[0], args[1], args[2]);
> + }
> +
> + if (IntrinsicInst* intr = dyn_cast<IntrinsicInst>(inst)) {
> + if (! IsPerComponentOp(inst))
> + NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Scalarize instruction on a non-per-component intrinsic");
> +
> + // TODO: Assumption is that all per-component intrinsics have all their
> + // arguments be overloadable. Need to find some way to assert on this
> + // assumption. This is due to how getDeclaration operates; it only takes
> + // a list of types that fit overloadable slots.
> + SmallVector<Type*, 8> tys(1, GetBasicType(inst->getType()));
> + // Call instructions have the decl as a last argument, so skip it
> + for (ArrayRef<Value*>::iterator i = args.begin(), e = args.end() - 1; i != e; ++i) {
> + tys.push_back(GetBasicType((*i)->getType()));
> + }
> +
> + Function* f = Intrinsic::getDeclaration(module, intr->getIntrinsicID(), tys);
> + return CallInst::Create(f, args);
> + }
> +
> + NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Currently unsupported instruction: ", inst->getOpcode(),
> + // inst->getOpcodeName());
> + return 0;
> +
> + }
> +
> +
> + void Scalarize::makeScalarizedCalls(Function* f, ArrayRef<Value*> args, int count, VectorValues& vVals)
> + {
> + assert(count > 0 && count <= 16 && "invalid number of vector components");
> + for (int i = 0; i < count; ++i) {
> + Value* res;
> + SmallVector<Value*, 8> callArgs(args.begin(), args.end());
> + callArgs.push_back(ConstantInt::get(intTy, i));
> +
> + res = builder->CreateCall(f, callArgs);
> + vVals.setComponent(i, res);
> + }
> + }
> +
> + void Scalarize::makePerComponentScalarizedCalls(Instruction* inst, ArrayRef<Value*> args)
> + {
> + int count = GetComponentCount(inst);
> + assert(count > 0 && count <= 16 && "invalid number of vector components");
> + assert((inst->getNumOperands() == args.size() || isa<PHINode>(inst))
> + && "not enough arguments passed for instruction");
> +
> + VectorValues& vVals = vectorVals[inst];
> +
> + for (int i = 0; i < count; ++i) {
> + // Set this component of each arg
> + SmallVector<Value*, 8> callArgs(args.size(), 0);
> + gatherComponents(i, args, callArgs);
> +
> + Instruction* res = createScalarInstruction(inst, callArgs);
> +
> + vVals.setComponent(i, res);
> + builder->Insert(res);
> + }
> + }
> +
> + bool Scalarize::isValid(const Instruction* inst)
> + {
> + // The result
> + if (inst->getType()->isVectorTy())
> + return false;
> +
> + // The arguments
> + for (Instruction::const_op_iterator i = inst->op_begin(), e = inst->op_end(); i != e; ++i) {
> + const Value* v = (*i);
> + assert(v);
> + if (v->getType()->isVectorTy())
> + return false;
> + }
> +
> + return true;
> + }
> +
> + bool Scalarize::scalarize(Instruction* inst)
> + {
> + if (isValid(inst))
> + return false;
> +
> + assert(! vectorVals.count(inst) && "We've already scalarized this somehow?");
> + assert((canGetComponentArgs(inst) || isa<PHINode>(inst)) &&
> + "Scalarizing an op whose arguments haven't been scalarized ");
> + builder->SetInsertPoint(inst);
> +
> + if (IsPerComponentOp(inst))
> + return scalarizePerComponent(inst);
> +
> + if (LoadInst* ld = dyn_cast<LoadInst>(inst))
> + return scalarizeLoad(ld);
> +
> + if (CallInst* call = dyn_cast<CallInst>(inst))
> + return scalarizeFuncCall(call);
> +
> + if (ExtractElementInst* extr = dyn_cast<ExtractElementInst>(inst))
> + return scalarizeExtract(extr);
> +
> + if (InsertElementInst* ins = dyn_cast<InsertElementInst>(inst))
> + return scalarizeInsert(ins);
> +
> + if (ShuffleVectorInst* sv = dyn_cast<ShuffleVectorInst>(inst))
> + return scalarizeShuffleVector(sv);
> +
> + if (PHINode* phi = dyn_cast<PHINode>(inst))
> + return scalarizePHI(phi);
> +
> + if (isa<ExtractValueInst>(inst) || isa<InsertValueInst>(inst))
> + // TODO: need to come up with a struct/array model for scalarization
> + NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Scalarizing struct/array ops");
> +
> + if (StoreInst* st = dyn_cast<StoreInst>(inst))
> + return scalarizeStore(st);
> +
> + NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Currently unhandled instruction ", inst->getOpcode(), inst->getOpcodeName());
> + return false;
> + }
> +
> + bool Scalarize::scalarizeShuffleVector(ShuffleVectorInst* sv)
> + {
> + // %res = shuffleVector <n x ty> %foo, <n x ty> bar, <n x i32> <...>
> + // ==> nothing (just make a new VectorValues with the new components)
> + VectorValues& vVals = vectorVals[sv];
> +
> + int size = GetComponentCount(sv);
> + int srcSize = GetComponentCount(sv->getOperand(0)->getType());
> +
> + for (int i = 0; i < size; ++i) {
> + int select = sv->getMaskValue(i);
> +
> + if (select < 0) {
> + vVals.setComponent(i, UndefValue::get(GetBasicType(sv->getOperand(0))));
> + continue;
> + }
> +
> + // Otherwise look up the corresponding component from the correct
> + // source.
> + Value* selectee;
> + if (select < srcSize) {
> + selectee = sv->getOperand(0);
> + } else {
> + // Choose from the second operand
> + select -= srcSize;
> + selectee = sv->getOperand(1);
> + }
> +
> + vVals.setComponent(i, getComponent(select, selectee));
> + }
> +
> + return true;
> + }
> +
> + bool Scalarize::scalarizePerComponent(Instruction* inst)
> + {
> + // dst = op <n x ty> %foo, <n x ty> %bar
> + // ==> dstx = op ty %foox, ty %barx
> + // dsty = op ty %fooy, ty %bary
> + // ...
> +
> + SmallVector<Value*, 16> args(inst->op_begin(), inst->op_end());
> +
> + makePerComponentScalarizedCalls(inst, args);
> +
> + return true;
> + }
> +
> + bool Scalarize::scalarizePHI(PHINode* phi)
> + {
> + // dst = phi <n x ty> [ %foo, %bb1 ], [ %bar, %bb2], ...
> + // ==> dstx = phi ty [ %foox, %bb1 ], [ %barx, %bb2], ...
> + // dsty = phi ty [ %fooy, %bb1 ], [ %bary, %bb2], ...
> +
> + // If the scalar values are all known up-front, then just make the full
> + // phinode now. If they are not yet known (phinode for a loop variant
> + // variable), then deferr the arguments until later
> +
> + if (canGetComponentArgs(phi)) {
> + SmallVector<Value*, 8> args(phi->op_begin(), phi->op_end());
> + makePerComponentScalarizedCalls(phi, args);
> + } else {
> + makePerComponentScalarizedCalls(phi, ArrayRef<Value*>());
> + incompletePhis.push_back(phi);
> + }
> +
> + return true;
> + }
> +
> + void Scalarize::extractFromeVector(Value* insn) {
> + VectorValues& vVals = vectorVals[insn];
> +
> + for (int i = 0; i < GetComponentCount(insn); ++i) {
> + Value *cv = ConstantInt::get(intTy, i);
> + Value *EI = builder->CreateExtractElement(insn, cv);
> + vVals.setComponent(i, EI);
> + //unit.fakeInsnMap[EI] = insn;
> + unit.newValueProxy(insn, EI, i, 0);
> + }
> + }
> +
> + Value* Scalarize::InsertToVector(Value * insn, Value* vecValue) {
> + //VectorValues& vVals = vectorVals[writeValue];
> + //unit.vecValuesMap[call] = vectorVals[writeValue];
> +
> + //add fake insert instructions to avoid removed
> + Value *II = NULL;
> + for (int i = 0; i < GetComponentCount(vecValue); ++i) {
> + Value *vec = II ? II : UndefValue::get(vecValue->getType());
> + Value *cv = ConstantInt::get(intTy, i);
> + II = builder->CreateInsertElement(vec, getComponent(i, vecValue), cv);
> + //unit.vecValuesMap[insn].setComponent(i, getComponent(i, writeValue));
> + //unit.newValueProxy(getComponent(i, vecValue), vecValue, 0, i);
> + //unit.fakeInsnMap[II] = insn;
> + }
> +
> + for (int i = 0; i < GetComponentCount(vecValue); ++i) {
> + unit.newValueProxy(getComponent(i, vecValue), II, 0, i);
> + }
> + return II;
> + }
> +
> + bool Scalarize::scalarizeFuncCall(CallInst* call) {
> + if (Function *F = call->getCalledFunction()) {
> + if (F->getIntrinsicID() != 0) { //Intrinsic functions
> + NOT_IMPLEMENTED;
> + } else {
> + Value *Callee = call->getCalledValue();
> + const std::string fnName = Callee->getName();
> + auto it = instrinsicMap.map.find(fnName);
> + GBE_ASSERT(it != instrinsicMap.map.end());
> +
> + // Get the function arguments
> + CallSite CS(call);
> + CallSite::arg_iterator CI = CS.arg_begin() + 3;
> +
> + switch (it->second) {
> + default: break;
> + case GEN_OCL_READ_IMAGE0:
> + case GEN_OCL_READ_IMAGE1:
> + case GEN_OCL_READ_IMAGE2:
> + case GEN_OCL_READ_IMAGE3:
> + case GEN_OCL_READ_IMAGE4:
> + case GEN_OCL_READ_IMAGE5:
> + case GEN_OCL_READ_IMAGE10:
> + case GEN_OCL_READ_IMAGE11:
> + case GEN_OCL_READ_IMAGE12:
> + case GEN_OCL_READ_IMAGE13:
> + case GEN_OCL_READ_IMAGE14:
> + case GEN_OCL_READ_IMAGE15:
> + {
> + extractFromeVector(call);
> + break;
> + }
> + case GEN_OCL_WRITE_IMAGE10:
> + case GEN_OCL_WRITE_IMAGE11:
> + case GEN_OCL_WRITE_IMAGE12:
> + case GEN_OCL_WRITE_IMAGE13:
> + case GEN_OCL_WRITE_IMAGE14:
> + case GEN_OCL_WRITE_IMAGE15:
> + CI++;
> + case GEN_OCL_WRITE_IMAGE0:
> + case GEN_OCL_WRITE_IMAGE1:
> + case GEN_OCL_WRITE_IMAGE2:
> + case GEN_OCL_WRITE_IMAGE3:
> + case GEN_OCL_WRITE_IMAGE4:
> + case GEN_OCL_WRITE_IMAGE5:
> + {
> + *CI = InsertToVector(call, *CI);
> + break;
> + }
> + }
> + }
> + }
> + return false;
> + }
> +
> + bool Scalarize::scalarizeLoad(LoadInst* ld)
> + {
> + extractFromeVector(ld);
> + return false;
> + }
> +
> + bool Scalarize::scalarizeStore(StoreInst* st) {
> + st->setOperand(0, InsertToVector(st, st->getValueOperand()));
> + return false;
> + }
> +
> + bool Scalarize::scalarizeExtract(ExtractElementInst* extr)
> + {
> + // %res = extractelement <n X ty> %foo, %i
> + // ==> nothing (just use %foo's %ith component instead of %res)
> +
> + if (! isa<Constant>(extr->getOperand(1))) {
> + // TODO: Variably referenced components. Probably handle/emulate through
> + // a series of selects.
> + NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Variably referenced vector components");
> + }
> + //if (isa<Argument>(extr->getOperand(0)))
> + // return false;
> + int component = GetConstantInt(extr->getOperand(1));
> + Value* v = getComponent(component, extr->getOperand(0));
> + if(extr == v)
> + return false;
> + extr->replaceAllUsesWith(v);
> +
> + return true;
> + }
> +
> + bool Scalarize::scalarizeInsert(InsertElementInst* ins)
> + {
> + // %res = insertValue <n x ty> %foo, %i
> + // ==> nothing (just make a new VectorValues with the new component)
> +
> + if (! isa<Constant>(ins->getOperand(2))) {
> + // TODO: Variably referenced components. Probably handle/emulate through
> + // a series of selects.
> + NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Variably referenced vector components");
> + }
> +
> + int component = GetConstantInt(ins->getOperand(2));
> +
> + VectorValues& vVals = vectorVals[ins];
> + for (int i = 0; i < GetComponentCount(ins); ++i) {
> + vVals.setComponent(i, i == component ? ins->getOperand(1)
> + : getComponent(i, ins->getOperand(0)));
> + }
> +
> + return true;
> + }
> +
> + void Scalarize::scalarizeArgs(Function& F) {
> + if (F.arg_empty())
> + return;
> + ReversePostOrderTraversal<Function*> rpot(&F);
> + BasicBlock::iterator instI = (*rpot.begin())->begin();
> + builder->SetInsertPoint(instI);
> +
> + Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
> +
> +#if LLVM_VERSION_MINOR <= 1
> + const AttrListPtr &PAL = F.getAttributes();
> + uint32_t argID = 1; // Start at one actually
> + for (; I != E; ++I, ++argID) {
> +#else
> + for (; I != E; ++I) {
> +#endif /* LLVM_VERSION_MINOR <= 1 */
> + Type *type = I->getType();
> +
> + if(type->isVectorTy())
> + extractFromeVector(I);
> + }
> + return;
> + }
> +
> + bool Scalarize::runOnFunction(Function& F)
> + {
> + switch (F.getCallingConv()) {
> + case CallingConv::PTX_Device:
> + return false;
> + case CallingConv::PTX_Kernel:
> + break;
> + default: GBE_ASSERTM(false, "Unsupported calling convention");
> + }
> +
> + bool changed = false;
> + module = F.getParent();
> + intTy = IntegerType::get(module->getContext(), 32);
> + floatTy = Type::getFloatTy(module->getContext());
> + builder = new IRBuilder<>(module->getContext());
> +
> + scalarizeArgs(F);
> +
> + typedef ReversePostOrderTraversal<Function*> RPOTType;
> + RPOTType rpot(&F);
> + for (RPOTType::rpo_iterator bbI = rpot.begin(), bbE = rpot.end(); bbI != bbE; ++bbI) {
> + for (BasicBlock::iterator instI = (*bbI)->begin(), instE = (*bbI)->end(); instI != instE; ++instI) {
> + bool scalarized = scalarize(instI);
> + if (scalarized) {
> + changed = true;
> + // TODO: uncomment when done
> + deadList.push_back(instI);
> + }
> + }
> + }
> +
> + // Fill in the incomplete phis
> + for (SmallVectorImpl<PHINode*>::iterator phiI = incompletePhis.begin(), phiE = incompletePhis.end();
> + phiI != phiE; ++phiI) {
> + assert(canGetComponentArgs(*phiI) && "Phi's operands never scalarized");
> +
> + // Fill in each component of this phi
> + VectorValues& vVals = vectorVals[*phiI];
> + for (int c = 0; c < GetComponentCount(*phiI); ++c) {
> + PHINode* compPhi = dyn_cast<PHINode>(vVals.getComponent(c));
> + assert(compPhi && "Vector phi got scalarized to non-phis?");
> +
> + // Loop over pairs of operands: [Value*, BasicBlock*]
> + for (unsigned int i = 0; i < (*phiI)->getNumOperands(); i++) {
> + BasicBlock* bb = (*phiI)->getIncomingBlock(i);
> + assert(bb && "Non-basic block incoming block?");
> + compPhi->addIncoming(getComponent(c, (*phiI)->getOperand(i)), bb);
> + }
> + }
> + }
> +
> + dce();
> +
> + delete builder;
> + builder = 0;
> +
> + return changed;
> + }
> +
> + void Scalarize::dce()
> + {
> + //two passes delete for some phinode
> + for (std::vector<Instruction*>::reverse_iterator i = deadList.rbegin(), e = deadList.rend(); i != e; ++i) {
> + (*i)->dropAllReferences();
> + if((*i)->use_empty())
> + (*i)->eraseFromParent();
> + }
> + for (std::vector<Instruction*>::reverse_iterator i = deadList.rbegin(), e = deadList.rend(); i != e; ++i) {
> + if((*i)->getParent())
> + (*i)->eraseFromParent();
> + }
> + deadList.clear();
> + }
> +
> + void Scalarize::getAnalysisUsage(AnalysisUsage& AU) const
> + {
> + }
> +
> + void Scalarize::print(raw_ostream&, const Module*) const
> + {
> + return;
> + }
> + FunctionPass* createScalarizePass(ir::Unit &unit)
> + {
> + return new Scalarize(unit);
> + }
> + char Scalarize::ID = 0;
> +
> +} // end namespace
> diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
> index ea3d9eb..559cde0 100644
> --- a/backend/src/llvm/llvm_to_gen.cpp
> +++ b/backend/src/llvm/llvm_to_gen.cpp
> @@ -69,6 +69,7 @@ namespace gbe
> // Print the code before further optimizations
> if (OCL_OUTPUT_LLVM_BEFORE_EXTRA_PASS)
> passes.add(createPrintModulePass(&*o));
> + passes.add(createScalarizePass(unit)); // Expand all vector ops
> passes.add(createScalarReplAggregatesPass()); // Break up allocas
> passes.add(createRemoveGEPPass(unit));
> passes.add(createConstantPropagationPass());
> --
> 1.7.9.5
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list