[Beignet] [PATCH 1/3] Add a scalarize llvm pass.

Thu May 16 22:53:48 PDT 2013

Some minor comments as below. In general, this patchset is really nice, now we
have a cleaner code to handle vectors. Thanks.

On Thu, May 16, 2013 at 12:36:33PM +0800, Yang Rong wrote:
> In this pass, expand all normal vector ops to scalar ops, except store/load, image read/write and function's argument. Add fake ExtractElement/InsertElement instructions to avoid dead instruction elimination, and unit valueMap hold the relationship between these fake instructions and real load/store instructions.
> 
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
>  backend/src/CMakeLists.txt            |    1 +
>  backend/src/ir/unit.hpp               |   22 +-
>  backend/src/llvm/llvm_gen_backend.cpp |  241 +++-------
>  backend/src/llvm/llvm_gen_backend.hpp |   30 +-
>  backend/src/llvm/llvm_scalarize.cpp   |  801 +++++++++++++++++++++++++++++++++
>  backend/src/llvm/llvm_to_gen.cpp      |    1 +
>  6 files changed, 914 insertions(+), 182 deletions(-)
>  create mode 100644 backend/src/llvm/llvm_scalarize.cpp
> 
> diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
> index 1829964..183517a 100644
> --- a/backend/src/CMakeLists.txt
> +++ b/backend/src/CMakeLists.txt
> @@ -83,6 +83,7 @@ else (GBE_USE_BLOB)
>      backend/program.h
>      llvm/llvm_gen_backend.cpp
>      llvm/llvm_passes.cpp
> +    llvm/llvm_scalarize.cpp
>      llvm/llvm_to_gen.cpp
>      llvm/llvm_gen_backend.hpp
>      llvm/llvm_gen_ocl_function.hxx
> diff --git a/backend/src/ir/unit.hpp b/backend/src/ir/unit.hpp
> index ae78638..3b293f5 100644
> --- a/backend/src/ir/unit.hpp
> +++ b/backend/src/ir/unit.hpp
> @@ -1,4 +1,4 @@
> -/* 
> +/*
>   * Copyright © 2012 Intel Corporation
>   *
>   * This library is free software; you can redistribute it and/or
> @@ -24,9 +24,12 @@
>  #ifndef __GBE_IR_UNIT_HPP__
>  #define __GBE_IR_UNIT_HPP__
>  
> +#include "llvm/Value.h"
> +
>  #include "ir/constant.hpp"
>  #include "ir/register.hpp"
>  #include "sys/hash_map.hpp"
> +#include "sys/map.hpp"
>  
>  namespace gbe {
>  namespace ir {
> @@ -41,6 +44,7 @@ namespace ir {
>    {
>    public:
>      typedef hash_map<std::string, Function*> FunctionSet;
> +    typedef std::pair<llvm::Value*, uint32_t> ValueIndex;
>      /*! Create an empty unit */
>      Unit(PointerSize pointerSize = POINTER_32_BITS);
>      /*! Release everything (*including* the function pointers) */
> @@ -71,11 +75,27 @@ namespace ir {
>      ConstantSet& getConstantSet(void) { return constantSet; }
>      /*! Return the constant set */
>      const ConstantSet& getConstantSet(void) const { return constantSet; }
> +
> +    /*! Some values will not be allocated. For example a vector extract and
> +     * a vector insertion when scalarize the vector load/store
> +     */
> +    void newValueProxy(llvm::Value *real,
> +                       llvm::Value *fake,
> +                       uint32_t realIndex = 0u,
> +                       uint32_t fakeIndex = 0u) {
> +      const ValueIndex key(fake, fakeIndex);
> +      const ValueIndex value(real, realIndex);
> +      GBE_ASSERT(valueMap.find(key) == valueMap.end()); // Do not insert twice
> +      valueMap[key] = value;
> +    }
> +    /*! Return the value map */
> +    const map<ValueIndex, ValueIndex>& getValueMap(void) const { return valueMap; }
>    private:
>      friend class ContextInterface; //!< Can free modify the unit
>      hash_map<std::string, Function*> functions; //!< All the defined functions
>      ConstantSet constantSet; //!< All the constants defined in the unit
>      PointerSize pointerSize; //!< Size shared by all pointers
> +    map<ValueIndex, ValueIndex> valueMap; //!< fake to real value map for vector load/store
>      GBE_CLASS(Unit);
>    };
>  
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index 8dcf15c..3855011 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -1,4 +1,4 @@
> -/* 
> +/*
>   * Copyright © 2012 Intel Corporation
>   *
>   * This library is free software; you can redistribute it and/or
> @@ -60,7 +60,7 @@
>   * dependencies on endianness or ABIs. Fortunately, the ptx (and nvptx for LLVM
>   * 3.2) profile is pretty well adapted to our needs since NV and Gen GPU are
>   * kind of similar, or at least they are similar enough to share the same front
> - * end. 
> + * end.
>   *
>   * Problems
>   * ========
> @@ -126,10 +126,8 @@
>  #include "ir/context.hpp"
>  #include "ir/unit.hpp"
>  #include "ir/liveness.hpp"
> -#include "sys/map.hpp"
>  #include "sys/set.hpp"
>  #include "sys/cvar.hpp"
> -#include <algorithm>
>  
>  /* Not defined for LLVM 3.0 */
>  #if !defined(LLVM_VERSION_MAJOR)
> @@ -207,7 +205,7 @@ namespace gbe
>    /*! Type to register family translation */
>    static ir::RegisterFamily getFamily(const ir::Context &ctx, const Type *type)
>    {
> -    GBE_ASSERT(isScalarType(type) == true); 
> +    GBE_ASSERT(isScalarType(type) == true);
>      if (type == Type::getInt1Ty(type->getContext()))
>        return ir::FAMILY_BOOL;
>      if (type == Type::getInt8Ty(type->getContext()))
> @@ -269,6 +267,8 @@ namespace gbe
>    class RegisterTranslator
>    {
>    public:
> +    /*! Indices will be zero for scalar values */
> +    typedef std::pair<Value*, uint32_t> ValueIndex;
>      RegisterTranslator(ir::Context &ctx) : ctx(ctx) {}
>  
>      /*! Empty the maps */
> @@ -289,6 +289,11 @@ namespace gbe
>        GBE_ASSERT(valueMap.find(key) == valueMap.end()); // Do not insert twice
>        valueMap[key] = value;
>      }
> +    /*! After scalarize pass, there are some valueMap in unit,
> +     *  use this function to copy from unit valueMap */
> +    void initValueMap(const map<ValueIndex, ValueIndex>& vMap) {
> +      valueMap.insert(vMap.begin(), vMap.end());
> +    }
>      /*! Mostly used for the preallocated registers (lids, gids) */
>      void newScalarProxy(ir::Register reg, Value *value, uint32_t index = 0u) {
>        const ValueIndex key(value, index);
> @@ -325,10 +330,9 @@ namespace gbe
>        };
>        return ir::Register();
>      }
> -    /*! Get the register from the given value at given index possibly iterating
> -     *  in the value map to get the final real register
> -     */
> -    ir::Register getScalar(Value *value, uint32_t index = 0u) {
> +
> +    /*! iterating in the value map to get the final real register */
> +    void getRealValue(Value* &value, uint32_t& index) {
>        auto end = valueMap.end();
>        for (;;) {
>          auto it = valueMap.find(std::make_pair(value, index));
> @@ -339,6 +343,14 @@ namespace gbe
>            index = it->second.second;
>          }
>        }
> +    }
> +
> +    /*! Get the register from the given value at given index possibly iterating
> +     *  in the value map to get the final real register
> +     */
> +    ir::Register getScalar(Value *value, uint32_t index = 0u) {
> +      getRealValue(value, index);
> +
>        const auto key = std::make_pair(value, index);
>        GBE_ASSERT(scalarMap.find(key) != scalarMap.end());
>        return scalarMap[key];
> @@ -351,16 +363,8 @@ namespace gbe
>      }
>      /*! Says if the value exists. Otherwise, it is undefined */
>      bool valueExists(Value *value, uint32_t index) {
> -      auto end = valueMap.end();
> -      for (;;) {
> -        auto it = valueMap.find(std::make_pair(value, index));
> -        if (it == end)
> -          break;
> -        else {
> -          value = it->second.first;
> -          index = it->second.second;
> -        }
> -      }
> +      getRealValue(value, index);
> +
>        const auto key = std::make_pair(value, index);
>        return scalarMap.find(key) != scalarMap.end();
>      }
> @@ -375,8 +379,6 @@ namespace gbe
>        this->insertRegister(reg, key, index);
>        return reg;
>      }
> -    /*! Indices will be zero for scalar values */
> -    typedef std::pair<Value*, uint32_t> ValueIndex;
>      /*! Map value to ir::Register */
>      map<ValueIndex, ir::Register> scalarMap;
>      /*! Map values to values when this is only a translation (eq bitcast) */
> @@ -384,28 +386,6 @@ namespace gbe
>      /*! Actually allocates the registers */
>      ir::Context &ctx;
>    };
> -  /*! All intrinsic Gen functions */
> -  enum OCLInstrinsic {
> -#define DECL_LLVM_GEN_FUNCTION(ID, NAME) GEN_OCL_##ID,
> -#include "llvm_gen_ocl_function.hxx"
> -#undef DECL_LLVM_GEN_FUNCTION
> -  };
> -
> -  /*! Build the hash map for OCL functions on Gen */
> -  struct OCLIntrinsicMap {
> -    /*! Build the intrinsic hash map */
> -    OCLIntrinsicMap(void) {
> -#define DECL_LLVM_GEN_FUNCTION(ID, NAME) \
> -  map.insert(std::make_pair(#NAME, GEN_OCL_##ID));
> -#include "llvm_gen_ocl_function.hxx"
> -#undef DECL_LLVM_GEN_FUNCTION
> -    }
> -    /*! Sort intrinsics with their names */
> -    hash_map<std::string, OCLInstrinsic> map;
> -  };
> -
> -  /*! Sort the OCL Gen instrinsic functions (built on pre-main) */
> -  static const OCLIntrinsicMap instrinsicMap;
>  
>    /*! Translate LLVM IR code to Gen IR code */
>    class GenWriter : public FunctionPass, public InstVisitor<GenWriter>
> @@ -423,7 +403,7 @@ namespace gbe
>       */
>      set<const Value*> conditionSet;
>      /*! We visit each function twice. Once to allocate the registers and once to
> -     *  emit the Gen IR instructions 
> +     *  emit the Gen IR instructions
>       */
>      enum Pass {
>        PASS_EMIT_REGISTERS = 0,
> @@ -663,7 +643,7 @@ namespace gbe
>      if (dyn_cast<ConstantAggregateZero>(CPV)) {
>        return doIt(uint32_t(0)); // XXX Handle type
>      } else {
> -      if (dyn_cast<ConstantVector>(CPV)) 
> +      if (dyn_cast<ConstantVector>(CPV))
>          CPV = extractConstantElem(CPV, index);
>        GBE_ASSERTM(dyn_cast<ConstantExpr>(CPV) == NULL, "Unsupported constant expression");
>  
> @@ -756,6 +736,9 @@ namespace gbe
>    }
>  
>    ir::Register GenWriter::getRegister(Value *value, uint32_t elemID) {
> +    //the real value may be constant, so get real value before constant check
> +    regTranslator.getRealValue(value, elemID);
> +
>      if (dyn_cast<ConstantExpr>(value)) {
>        ConstantExpr *ce = dyn_cast<ConstantExpr>(value);
>        if(ce->isCast()) {
> @@ -867,6 +850,7 @@ namespace gbe
>                  "Returned value for kernel functions is forbidden");
>      // Loop over the arguments and output registers for them
>      if (!F.arg_empty()) {
> +      uint32_t argID = 0;
>        Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
>  
>        // Insert a new register for each function argument
> @@ -875,10 +859,33 @@ namespace gbe
>        uint32_t argID = 1; // Start at one actually
>        for (; I != E; ++I, ++argID) {
>  #else
> -      for (; I != E; ++I) {
> +      for (; I != E; ++I, ++argID) {
>  #endif /* LLVM_VERSION_MINOR <= 1 */
>          const std::string &argName = I->getName().str();
>          Type *type = I->getType();
> +
> +        //add support for vector argument
> +        if(type->isVectorTy()) {
> +          VectorType *vectorType = cast<VectorType>(type);
> +
> +          this->newRegister(I);
> +          ir::Register reg = getRegister(I, 0);
> +
> +          Type *elemType = vectorType->getElementType();
> +          const uint32_t elemSize = getTypeByteSize(unit, elemType);
> +          const uint32_t elemNum = vectorType->getNumElements();
> +          //vector's elemType always scalar type
> +          ctx.input(argName, ir::FunctionArgument::VALUE, reg, elemNum*elemSize);
> +
> +          ir::Function& fn = ctx.getFunction();
> +          for(uint32_t i=1; i < elemNum; i++) {
> +            ir::PushLocation argLocation(fn, argID, elemSize*i);
> +            reg = getRegister(I, i);
> +            ctx.appendPushedConstant(reg, argLocation);  //add to push map for reg alloc
> +          }
> +          continue;
> +        }
> +
>          GBE_ASSERTM(isScalarType(type) == true,
>                      "vector type in the function argument is not supported yet");
>          const ir::Register reg = regTranslator.newScalar(I);
> @@ -916,7 +923,6 @@ namespace gbe
>                  ctx.input(argName, ir::FunctionArgument::IMAGE, reg, ptrSize);
>                  ctx.getFunction().getImageSet()->append(reg, &ctx);
>                break;
> -              break;
>                default: GBE_ASSERT(addrSpace != ir::MEM_PRIVATE);
>              }
>            }
> @@ -1141,6 +1147,7 @@ namespace gbe
>  
>      ctx.startFunction(F.getName());
>      this->regTranslator.clear();
> +    this->regTranslator.initValueMap(unit.getValueMap());
>      this->labelMap.clear();
>      this->emitFunctionPrototype(F);
>  
> @@ -1495,141 +1502,15 @@ namespace gbe
>      ir::Context &ctx;
>    };
>  
> -  void GenWriter::regAllocateInsertElement(InsertElementInst &I) {
> -    Value *modified = I.getOperand(0);
> -    Value *toInsert = I.getOperand(1);
> -    Value *index = I.getOperand(2);
> -
> -    // Get the index for the insertion
> -    Constant *CPV = dyn_cast<Constant>(index);
> -    GBE_ASSERTM(CPV != NULL, "only constant indices when inserting values");
> -    auto x = processConstant<ir::Immediate>(CPV, InsertExtractFunctor(ctx));
> -    GBE_ASSERTM(x.type == ir::TYPE_U32 || x.type == ir::TYPE_S32,
> -                "Invalid index type for InsertElement");
> -
> -    // Crash on overrun
> -    VectorType *vectorType = cast<VectorType>(modified->getType());
> -    const uint32_t elemNum = vectorType->getNumElements();
> -    const uint32_t modifiedID = x.data.u32;
> -    GBE_ASSERTM(modifiedID < elemNum, "Out-of-bound index for InsertElement");
> -
> -    // The source vector is not constant
> -    if (!isa<Constant>(modified) || isa<UndefValue>(modified)) {
> -       // Non modified values are just proxies
> -       for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
> -         if (elemID != modifiedID)
> -           regTranslator.newValueProxy(modified, &I, elemID, elemID);
> -     }
> -     // The source vector is constant
> -     else {
> -       // Non modified values will use LOADI
> -       for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
> -         if (elemID != modifiedID) {
> -           const ir::Type type = getType(ctx, toInsert->getType());
> -           const ir::Register reg = ctx.reg(getFamily(type));
> -           regTranslator.insertRegister(reg, &I, elemID);
> -         }
> -     }
> -
> -     // If the element to insert is an immediate we will generate a LOADI.
> -     // Otherwise, the value is just a proxy of the inserted value
> -     if (dyn_cast<Constant>(toInsert) != NULL) {
> -       const ir::Type type = getType(ctx, toInsert->getType());
> -       const ir::Register reg = ctx.reg(getFamily(type));
> -       regTranslator.insertRegister(reg, &I, modifiedID);
> -     } else
> -       regTranslator.newValueProxy(toInsert, &I, 0, modifiedID);
> -  }
> -
> -  void GenWriter::emitInsertElement(InsertElementInst &I) {
> -    // Note that we check everything in regAllocateInsertElement
> -    Value *modified = I.getOperand(0);
> -    Value *toInsert = I.getOperand(1);
> -    Value *index = I.getOperand(2);
> -
> -    // Get the index of the value to insert
> -    Constant *indexCPV = dyn_cast<Constant>(index);
> -    auto x = processConstant<ir::Immediate>(indexCPV, InsertExtractFunctor(ctx));
> -    const uint32_t modifiedID = x.data.u32;
> -
> -    // The source vector is constant. We need to insert LOADI for the unmodified
> -    // values
> -    if (isa<Constant>(modified) && !isa<UndefValue>(modified)) {
> -      VectorType *vectorType = cast<VectorType>(modified->getType());
> -      const uint32_t elemNum = vectorType->getNumElements();
> -      for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
> -        if (elemID != modifiedID) {
> -          Constant *sourceCPV = dyn_cast<Constant>(modified);
> -          if (isa<UndefValue>(extractConstantElem(sourceCPV, elemID)) == false) {
> -            const ir::ImmediateIndex immIndex = this->newImmediate(sourceCPV, elemID);
> -            const ir::Immediate imm = ctx.getImmediate(immIndex);
> -            const ir::Register reg = regTranslator.getScalar(&I, elemID);
> -            ctx.LOADI(imm.type, reg, immIndex);
> -          }
> -        }
> -    }
> -
> -    // If the inserted value is not a constant, we just use a proxy
> -    if (dyn_cast<Constant>(toInsert) == NULL)
> -      return;
> -
> -    // We need a LOADI if we insert an immediate
> -    Constant *toInsertCPV = dyn_cast<Constant>(toInsert);
> -    const ir::ImmediateIndex immIndex = this->newImmediate(toInsertCPV);
> -    const ir::Immediate imm = ctx.getImmediate(immIndex);
> -    const ir::Register reg = regTranslator.getScalar(&I, modifiedID);
> -    ctx.LOADI(imm.type, reg, immIndex);
> -  }
> -
> -  void GenWriter::regAllocateExtractElement(ExtractElementInst &I) {
> -    Value *extracted = I.getOperand(0);
> -    Value *index = I.getOperand(1);
> -    GBE_ASSERTM(isa<Constant>(extracted) == false,
> -                "TODO support constant vector for extract");
> -    Constant *CPV = dyn_cast<Constant>(index);
> -    GBE_ASSERTM(CPV != NULL, "only constant indices when inserting values");
> -    auto x = processConstant<ir::Immediate>(CPV, InsertExtractFunctor(ctx));
> -    GBE_ASSERTM(x.type == ir::TYPE_U32 || x.type == ir::TYPE_S32,
> -                "Invalid index type for InsertElement");
> -
> -    // Crash on overrun
> -    const uint32_t extractedID = x.data.u32;
> -#if GBE_DEBUG
> -    VectorType *vectorType = cast<VectorType>(extracted->getType());
> -    const uint32_t elemNum = vectorType->getNumElements();
> -    GBE_ASSERTM(extractedID < elemNum, "Out-of-bound index for InsertElement");
> -#endif /* GBE_DEBUG */
> -
> -    // Easy when the vector is not immediate
> -    regTranslator.newValueProxy(extracted, &I, extractedID, 0);
> -  }
> -
> -  void GenWriter::emitExtractElement(ExtractElementInst &I) {
> -    // TODO -> insert LOADI when the extracted vector is constant
> -  }
> +  /*! Because there are still fake insert/extract instruction for
> +   *  load/store, so keep empty function here */
> +  void GenWriter::regAllocateInsertElement(InsertElementInst &I) {}
> +  void GenWriter::emitInsertElement(InsertElementInst &I) {}
>  
> -  void GenWriter::regAllocateShuffleVectorInst(ShuffleVectorInst &I) {
> -    Value *first = I.getOperand(0);
> -    Value *second = I.getOperand(1);
> -    GBE_ASSERTM(!isa<Constant>(first) || isa<UndefValue>(first),
> -                "TODO support constant vector for shuffle");
> -    GBE_ASSERTM(!isa<Constant>(second) || isa<UndefValue>(second),
> -                "TODO support constant vector for shuffle");
> -    VectorType *dstType = cast<VectorType>(I.getType());
> -    VectorType *srcType = cast<VectorType>(first->getType());
> -    const uint32_t dstElemNum = dstType->getNumElements();
> -    const uint32_t srcElemNum = srcType->getNumElements();
> -    for (uint32_t elemID = 0; elemID < dstElemNum; ++elemID) {
> -      uint32_t srcID = I.getMaskValue(elemID);
> -      Value *src = first;
> -      if (srcID >= srcElemNum) {
> -        srcID -= srcElemNum;
> -        src = second;
> -      }
> -      regTranslator.newValueProxy(src, &I, srcID, elemID);
> -    }
> -  }
> +  void GenWriter::regAllocateExtractElement(ExtractElementInst &I) {}
> +  void GenWriter::emitExtractElement(ExtractElementInst &I) {}
>  
> +  void GenWriter::regAllocateShuffleVectorInst(ShuffleVectorInst &I) {}
>    void GenWriter::emitShuffleVectorInst(ShuffleVectorInst &I) {}
>  
>    void GenWriter::regAllocateSelectInst(SelectInst &I) {
> diff --git a/backend/src/llvm/llvm_gen_backend.hpp b/backend/src/llvm/llvm_gen_backend.hpp
> index c270924..2ad879e 100644
> --- a/backend/src/llvm/llvm_gen_backend.hpp
> +++ b/backend/src/llvm/llvm_gen_backend.hpp
> @@ -1,4 +1,4 @@
> -/* 
> +/*
>   * Copyright © 2012 Intel Corporation
>   *
>   * This library is free software; you can redistribute it and/or
> @@ -28,6 +28,9 @@
>  
>  #include "llvm/Pass.h"
>  #include "sys/platform.hpp"
> +#include "sys/map.hpp"
> +#include "sys/hash_map.hpp"
> +#include <algorithm>
>  
>  // LLVM Type
>  namespace llvm { class Type; }
> @@ -37,6 +40,29 @@ namespace gbe
>    // Final target of the Gen backend
>    namespace ir { class Unit; }
>  
> +  /*! All intrinsic Gen functions */
> +  enum OCLInstrinsic {
> +#define DECL_LLVM_GEN_FUNCTION(ID, NAME) GEN_OCL_##ID,
> +#include "llvm_gen_ocl_function.hxx"
> +#undef DECL_LLVM_GEN_FUNCTION
> +  };
> +
> +  /*! Build the hash map for OCL functions on Gen */
> +  struct OCLIntrinsicMap {
> +    /*! Build the intrinsic hash map */
> +    OCLIntrinsicMap(void) {
> +#define DECL_LLVM_GEN_FUNCTION(ID, NAME) \
> +  map.insert(std::make_pair(#NAME, GEN_OCL_##ID));
> +#include "llvm_gen_ocl_function.hxx"
> +#undef DECL_LLVM_GEN_FUNCTION
> +    }
> +    /*! Sort intrinsics with their names */
> +    hash_map<std::string, OCLInstrinsic> map;
> +  };
> +
> +  /*! Sort the OCL Gen instrinsic functions (built on pre-main) */
> +  static const OCLIntrinsicMap instrinsicMap;
> +
>    /*! Pad the offset */
>    uint32_t getPadding(uint32_t offset, uint32_t align);
>  
> @@ -55,6 +81,8 @@ namespace gbe
>    /*! Remove the GEP instructions */
>    llvm::BasicBlockPass *createRemoveGEPPass(const ir::Unit &unit);
>  
> +  llvm::FunctionPass* createScalarizePass(ir::Unit &unit);
> +
>  } /* namespace gbe */
>  
>  #endif /* __GBE_LLVM_GEN_BACKEND_HPP__ */
> diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp
> new file mode 100644
> index 0000000..453a23a
> --- /dev/null
> +++ b/backend/src/llvm/llvm_scalarize.cpp
> @@ -0,0 +1,801 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see <http://www.gnu.org/licenses/>.
> + *
> + * Author: Benjamin Segovia <benjamin.segovia at intel.com>
> + *         Heldge RHodin <alice.rhodin at alice-dsl.net>
> + */
> +
> +/**
> + * \file llvm_passes.cpp
            ~~~~~~~~~~~~~~~ wrong file name.
> + * \author Benjamin Segovia <benjamin.segovia at intel.com>
> + * \author Heldge RHodin <alice.rhodin at alice-dsl.net>
> + */
The above author information seems not correct. As we discussed on the IM,
I checked the link: https://code.google.com/p/lunarglass/source/browse/trunk/Core/Passes/Transforms/Scalarize.cpp?r=605
And found the following statement, I suggest you to keep this statement as is and add intel statement on top of it.

//===- Scalarize.cpp - Scalarize LunarGLASS IR ----------------------------===// 
// 
// LunarGLASS: An Open Modular Shader Compiler Architecture 
// Copyright (C) 2010-2011 LunarG, Inc. 
// 
// This program is free software; you can redistribute it and/or 
// modify it under the terms of the GNU General Public License 
// as published by the Free Software Foundation; version 2 of the 
// License. 
// 
// This program is distributed in the hope that it will be useful, 
// but WITHOUT ANY WARRANTY; without even the implied warranty of 
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
// GNU General Public License for more details. 
// 
// You should have received a copy of the GNU General Public License 
// along with this program; if not, write to the Free Software 
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
// 02110-1301, USA. 
// 
//===----------------------------------------------------------------------===// 
// 
// Author: Michael Ilseman, LunarG 
// 
//===----------------------------------------------------------------------===// 
// 
// Scalarize the IR. 
//   * Loads of uniforms become multiple loadComponent calls 
// 
//   * Reads/writes become read/writeComponent calls 
// 
//   * Component-wise operations become multiple ops over each component 
// 
//   * Texture call become recomponsed texture calls 
// 
//   * Vector ops disappear, with their users referring to the scalarized 
//   * components 
// 
//===----------------------------------------------------------------------===// 

> +
> +/* THIS CODE IS DERIVED FROM GPL LLVM PTX BACKEND. CODE IS HERE:
> + * http://sourceforge.net/scm/?type=git&group_id=319085
> + * Note that however, the original author, Heldge Rhodin, granted me (Benjamin
> + * Segovia) the right to use another license for it (MIT here)
> + */
> +
> +#include "llvm/ADT/DenseMap.h"
> +#include "llvm/ADT/PostOrderIterator.h"
> +#include "llvm/Function.h"
> +#include "llvm/InstrTypes.h"
> +#include "llvm/Instructions.h"
> +#include "llvm/IntrinsicInst.h"
> +#include "llvm/Module.h"
> +#include "llvm/Pass.h"
> +#include "llvm/IRBuilder.h"
> +#include "llvm/Support/CallSite.h"
> +#include "llvm/Support/CFG.h"
> +#include "llvm/Support/raw_ostream.h"
> +
> +#include "llvm/llvm_gen_backend.hpp"
> +#include "ir/unit.hpp"
> +#include "sys/map.hpp"
> +
> +
> +using namespace llvm;
> +
> +namespace gbe {
> +
> +  struct VectorValues {
> +    VectorValues() : vals()
> +    { }
> +
> +    void setComponent(int c, llvm::Value* val)
> +    {
> +      assert(c >= 0 && c < 16 && "Out of bounds component");
> +      vals[c] = val;
> +    }
> +    llvm::Value* getComponent(int c)
> +    {
> +      assert(c >= 0 && c < 16 && "Out of bounds component");
> +      assert(vals[c] && "Requesting non-existing component");
> +      return vals[c];
> +    }
> +
> +    // {Value* x, Value* y, Value* z, Value* w}
> +    llvm::Value* vals[16];
> +  };
> +
> +  class Scalarize : public FunctionPass {
> +
> +  public:
> +    // Standard pass stuff
> +    static char ID;
> +
> +    Scalarize(ir::Unit& unit) : FunctionPass(ID), unit(unit)
> +    {
> +      initializeLoopInfoPass(*PassRegistry::getPassRegistry());
> +      initializeDominatorTreePass(*PassRegistry::getPassRegistry());
> +    }
> +
> +    virtual bool runOnFunction(Function&);
> +    void print(raw_ostream&, const Module* = 0) const;
> +    virtual void getAnalysisUsage(AnalysisUsage&) const;
> +
> +  protected:
> +    // An instruction is valid post-scalarization iff it is fully scalar or it
> +    // is a gla_loadn
> +    bool isValid(const Instruction*);
> +
> +    // Take an instruction that produces a vector, and scalarize it
> +    bool scalarize(Instruction*);
> +    bool scalarizePerComponent(Instruction*);
> +    bool scalarizeFuncCall(CallInst *);
> +    bool scalarizeLoad(LoadInst*);
> +    bool scalarizeStore(StoreInst*);
> +    //bool scalarizeIntrinsic(IntrinsicInst*);
> +    bool scalarizeExtract(ExtractElementInst*);
> +    bool scalarizeInsert(InsertElementInst*);
> +    bool scalarizeShuffleVector(ShuffleVectorInst*);
> +    bool scalarizePHI(PHINode*);
> +    void scalarizeArgs(Function& F);
> +    // ...
> +
> +    // Helpers to make the actual multiple scalar calls, one per
> +    // component. Updates the given VectorValues's components with the new
> +    // Values.
> +    void makeScalarizedCalls(Function*, ArrayRef<Value*>, int numComponents, VectorValues&);
> +
> +    void makePerComponentScalarizedCalls(Instruction*, ArrayRef<Value*>);
> +
> +    // Makes a scalar form of the given instruction: replaces the operands
> +    // and chooses a correct return type
> +    Instruction* createScalarInstruction(Instruction* inst, ArrayRef<Value*>);
> +
> +    // Gather the specified components in the given values. Returns the
> +    // component if the given value is a vector, or the scalar itself.
> +    void gatherComponents(int component, ArrayRef<Value*> args, SmallVectorImpl<Value*>& componentArgs);
> +
> +    // Get the assigned component for that value. If the value is a scalar,
> +    // returns the scalar. If it's a constant, returns that component. If
> +    // it's an instruction, returns the vectorValues of that instruction for
> +    // that component
> +    Value* getComponent(int component, Value*);
> +
> +    // Used for assertion purposes. Whether we can get the component out with
> +    // a getComponent call
> +    bool canGetComponent(Value*);
> +
> +    // Used for assertion purposes. Whether for every operand we can get
> +    // components with a getComponent call
> +    bool canGetComponentArgs(User*);
> +
> +    // Delete the instruction in the deadList
> +    void dce();
> +
> +
> +    int GetConstantInt(const Value* value);
> +    bool IsPerComponentOp(const Instruction* inst);
> +    bool IsPerComponentOp(const Value* value);
> +
> +    //these function used to add extract and insert instructions when load/store etc.
> +    void extractFromeVector(Value* insn);
            ~~~~~~~~~~~~~~~~~~ should it be extractFromVector, there is a e between From and Vector, which I think should be a typo, right?
> +    Value* InsertToVector(Value* insn, Value* vecValue);
> +
> +    Type* GetBasicType(Value* value) {
> +      return GetBasicType(value->getType());
> +    }
> +
> +    Type* GetBasicType(Type* type) {
> +      switch(type->getTypeID()) {
> +      case Type::VectorTyID:
> +      case Type::ArrayTyID:
> +        return GetBasicType(type->getContainedType(0));
> +      default:
> +        break;
> +      }
> +      return type;
> +    }
> +
> +    int GetComponentCount(const Type* type)  {
> +      if (type->getTypeID() == Type::VectorTyID)
> +        return llvm::dyn_cast<VectorType>(type)->getNumElements();
> +      else
> +        return 1;
> +    }
> +
> +    int GetComponentCount(const Value* value) {
> +      return GetComponentCount(value->getType());
> +    }
> +
> +    DenseMap<Value*, VectorValues> vectorVals;
> +    Module* module;
> +    IRBuilder<>* builder;
> +
> +    Type* intTy;
> +    Type* floatTy;
> +    ir::Unit &unit;
> +
> +    std::vector<Instruction*> deadList;
> +
> +    // List of vector phis that were not completely scalarized because some
> +    // of their operands hadn't before been visited (i.e. loop variant
> +    // variables)
> +    SmallVector<PHINode*, 16> incompletePhis;
> +  };
> +
> +  Value* Scalarize::getComponent(int component, Value* v)
> +  {
> +    assert(canGetComponent(v) && "getComponent called on unhandled vector");
> +
> +    if (v->getType()->isVectorTy()) {
> +      if (ConstantDataVector* c = dyn_cast<ConstantDataVector>(v)) {
> +        return c->getElementAsConstant(component);
> +      } else if (ConstantVector* c = dyn_cast<ConstantVector>(v)) {
> +        return c->getOperand(component);
> +      } else if (isa<ConstantAggregateZero>(v)) {
> +        return Constant::getNullValue(GetBasicType(v));
> +      } else if (isa<UndefValue>(v)) {
> +        return UndefValue::get(GetBasicType(v));
> +      } else {
> +        return vectorVals[v].getComponent(component);
> +      }
> +    } else {
> +      return v;
> +    }
> +  }
> +
> +  bool IsPerComponentOp(const llvm::Value* value)
> +  {
> +    const llvm::Instruction* inst = llvm::dyn_cast<const llvm::Instruction>(value);
> +    return inst && IsPerComponentOp(inst);
> +  }
> +
> +  bool Scalarize::IsPerComponentOp(const Instruction* inst)
> +  {
> +    //if (const IntrinsicInst* intr = dyn_cast<const IntrinsicInst>(inst))
> +    //    return IsPerComponentOp(intr);
> +
> +    if (inst->isTerminator())
> +        return false;
> +
> +    switch (inst->getOpcode()) {
> +
> +    // Cast ops are only per-component if they cast back to the same vector
> +    // width
> +    case Instruction::Trunc:
> +    case Instruction::ZExt:
> +    case Instruction::SExt:
> +    case Instruction::FPToUI:
> +    case Instruction::FPToSI:
> +    case Instruction::UIToFP:
> +    case Instruction::SIToFP:
> +    case Instruction::FPTrunc:
> +    case Instruction::FPExt:
> +    case Instruction::PtrToInt:
> +    case Instruction::IntToPtr:
> +    case Instruction::BitCast:
> +      return GetComponentCount(inst->getOperand(0)) == GetComponentCount(inst);
> +
> +    // Vector ops
> +    case Instruction::InsertElement:
> +    case Instruction::ExtractElement:
> +    case Instruction::ShuffleVector:
> +
> +    // Ways of accessing/loading/storing vectors
> +    case Instruction::ExtractValue:
> +    case Instruction::InsertValue:
> +
> +    // Memory ops
> +    case Instruction::Alloca:
> +    case Instruction::Load:
> +    case Instruction::Store:
> +    case Instruction::GetElementPtr:
> +    // Phis are a little special. We consider them not to be per-component
> +    // because the mechanism of choice is a single value (what path we took to
> +    // get here), and doesn't choose per-component (as select would). The caller
> +    // should know to handle phis specially
> +    case Instruction::PHI:
> +    // Call insts, conservatively are no per-component
> +    case Instruction::Call:
> +    // Misc
> +    case Instruction::LandingPad:  //--- 3.0
> +    case Instruction::VAArg:
> +      return false;
> +    } // end of switch (inst->getOpcode())
> +
> +    return true;
> +  }
> +  int Scalarize::GetConstantInt(const Value* value)
> +  {
> +    const ConstantInt *constantInt = dyn_cast<ConstantInt>(value);
> +
> +    // this might still be a constant expression, rather than a numeric constant,
> +    // e.g., expression with undef's in it, so it was not folded
> +    if (! constantInt)
> +      NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("non-simple constant");
> +
> +    return constantInt->getValue().getSExtValue();
> +  }
> +  bool Scalarize::canGetComponent(Value* v)
> +  {
> +    if (v->getType()->isVectorTy()) {
> +      if (isa<ConstantDataVector>(v) || isa<ConstantVector>(v) || isa<ConstantAggregateZero>(v) || isa<UndefValue>(v)) {
> +        return true;
> +      } else {
> +        assert((isa<Instruction>(v) || isa<Argument>(v)) && "Non-constant non-instuction?");
> +        return vectorVals.count(v);
> +      }
> +    } else {
> +      return true;
> +    }
> +  }
> +
> +  bool Scalarize::canGetComponentArgs(User* u)
> +  {
> +    if (PHINode* phi = dyn_cast<PHINode>(u)) {
> +      for (unsigned int i = 0; i < phi->getNumIncomingValues(); ++i)
> +        if (!canGetComponent(phi->getIncomingValue(i)))
> +          return false;
> +    } else {
> +      for (User::op_iterator i = u->op_begin(), e = u->op_end(); i != e; ++i)
> +        if (!canGetComponent(*i))
> +          return false;
> +    }
> +    return true;
> +  }
> +
> +  void Scalarize::gatherComponents(int component, ArrayRef<Value*> args, SmallVectorImpl<Value*>& componentArgs)
> +  {
> +    componentArgs.clear();
> +    for (ArrayRef<Value*>::iterator i = args.begin(), e = args.end(); i != e; ++i)
> +      componentArgs.push_back(getComponent(component, *i));
> +  }
> +
> +  Instruction* Scalarize::createScalarInstruction(Instruction* inst, ArrayRef<Value*> args)
> +  {
> +    // TODO: Refine the below into one large switch
> +
> +    unsigned op = inst->getOpcode();
> +    if (inst->isCast()) {
> +      assert(args.size() == 1 && "incorrect number of arguments for cast op");
> +      return CastInst::Create((Instruction::CastOps)op, args[0], GetBasicType(inst));
> +    }
> +
> +    if (inst->isBinaryOp()) {
> +      assert(args.size() == 2 && "incorrect number of arguments for binary op");
> +      return BinaryOperator::Create((Instruction::BinaryOps)op, args[0], args[1]);
> +    }
> +
> +    if (PHINode* phi = dyn_cast<PHINode>(inst)) {
> +      PHINode* res = PHINode::Create(GetBasicType(inst), phi->getNumIncomingValues());
> +      assert(args.size() % 2 == 0 && "Odd number of arguments for a PHI");
> +
> +      // Loop over pairs of operands: [Value*, BasicBlock*]
> +      for (unsigned int i = 0; i < args.size(); i++) {
> +        BasicBlock* bb = phi->getIncomingBlock(i); //dyn_cast<BasicBlock>(args[i+1]);
> +        //assert(bb && "Non-basic block incoming block?");
> +        res->addIncoming(args[i], bb);
> +      }
> +
> +      return res;
> +    }
> +
> +    if (CmpInst* cmpInst = dyn_cast<CmpInst>(inst)) {
> +      assert(args.size() == 2 && "incorrect number of arguments for comparison");
> +      return CmpInst::Create(cmpInst->getOpcode(), cmpInst->getPredicate(), args[0], args[1]);
> +    }
> +
> +    if (isa<SelectInst>(inst)) {
> +      assert(args.size() == 3 && "incorrect number of arguments for select");
> +      return SelectInst::Create(args[0], args[1], args[2]);
> +    }
> +
> +    if (IntrinsicInst* intr = dyn_cast<IntrinsicInst>(inst)) {
> +      if (! IsPerComponentOp(inst))
> +        NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Scalarize instruction on a non-per-component intrinsic");
> +
> +      // TODO: Assumption is that all per-component intrinsics have all their
> +      // arguments be overloadable. Need to find some way to assert on this
> +      // assumption. This is due to how getDeclaration operates; it only takes
> +      // a list of types that fit overloadable slots.
> +      SmallVector<Type*, 8> tys(1, GetBasicType(inst->getType()));
> +      // Call instructions have the decl as a last argument, so skip it
> +      for (ArrayRef<Value*>::iterator i = args.begin(), e = args.end() - 1; i != e; ++i) {
> +        tys.push_back(GetBasicType((*i)->getType()));
> +      }
> +
> +      Function* f = Intrinsic::getDeclaration(module, intr->getIntrinsicID(), tys);
> +      return CallInst::Create(f, args);
> +    }
> +
> +    NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Currently unsupported instruction: ", inst->getOpcode(),
> +                     //             inst->getOpcodeName());
> +    return 0;
> +
> +  }
> +
> +
> +  void Scalarize::makeScalarizedCalls(Function* f, ArrayRef<Value*> args, int count, VectorValues& vVals)
> +  {
> +    assert(count > 0 && count <= 16 && "invalid number of vector components");
> +    for (int i = 0; i < count; ++i) {
> +      Value* res;
> +      SmallVector<Value*, 8> callArgs(args.begin(), args.end());
> +      callArgs.push_back(ConstantInt::get(intTy, i));
> +
> +      res = builder->CreateCall(f, callArgs);
> +      vVals.setComponent(i, res);
> +    }
> +  }
> +
> +  void Scalarize::makePerComponentScalarizedCalls(Instruction* inst, ArrayRef<Value*> args)
> +  {
> +    int count = GetComponentCount(inst);
> +    assert(count > 0 && count <= 16 && "invalid number of vector components");
> +    assert((inst->getNumOperands() == args.size() || isa<PHINode>(inst))
> +           && "not enough arguments passed for instruction");
> +
> +    VectorValues& vVals = vectorVals[inst];
> +
> +    for (int i = 0; i < count; ++i) {
> +      // Set this component of each arg
> +      SmallVector<Value*, 8> callArgs(args.size(), 0);
> +      gatherComponents(i, args, callArgs);
> +
> +      Instruction* res = createScalarInstruction(inst, callArgs);
> +
> +      vVals.setComponent(i, res);
> +      builder->Insert(res);
> +    }
> +  }
> +
> +  bool Scalarize::isValid(const Instruction* inst)
> +  {
> +    // The result
> +    if (inst->getType()->isVectorTy())
> +        return false;
> +
> +    // The arguments
> +    for (Instruction::const_op_iterator i = inst->op_begin(), e = inst->op_end(); i != e; ++i) {
> +      const Value* v = (*i);
> +      assert(v);
> +      if (v->getType()->isVectorTy())
> +        return false;
> +    }
> +
> +    return true;
> +  }
> +
> +  bool Scalarize::scalarize(Instruction* inst)
> +  {
> +    if (isValid(inst))
> +        return false;
> +
> +    assert(! vectorVals.count(inst) && "We've already scalarized this somehow?");
> +    assert((canGetComponentArgs(inst) || isa<PHINode>(inst)) &&
> +           "Scalarizing an op whose arguments haven't been scalarized ");
> +    builder->SetInsertPoint(inst);
> +
> +    if (IsPerComponentOp(inst))
> +      return scalarizePerComponent(inst);
> +
> +    if (LoadInst* ld = dyn_cast<LoadInst>(inst))
> +      return scalarizeLoad(ld);
> +
> +    if (CallInst* call = dyn_cast<CallInst>(inst))
> +      return scalarizeFuncCall(call);
> +
> +    if (ExtractElementInst* extr = dyn_cast<ExtractElementInst>(inst))
> +      return scalarizeExtract(extr);
> +
> +    if (InsertElementInst* ins = dyn_cast<InsertElementInst>(inst))
> +      return scalarizeInsert(ins);
> +
> +    if (ShuffleVectorInst* sv = dyn_cast<ShuffleVectorInst>(inst))
> +      return scalarizeShuffleVector(sv);
> +
> +    if (PHINode* phi = dyn_cast<PHINode>(inst))
> +      return scalarizePHI(phi);
> +
> +    if (isa<ExtractValueInst>(inst) || isa<InsertValueInst>(inst))
> +      // TODO: need to come up with a struct/array model for scalarization
> +      NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Scalarizing struct/array ops");
> +
> +    if (StoreInst* st = dyn_cast<StoreInst>(inst))
> +      return scalarizeStore(st);
> +
> +    NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Currently unhandled instruction ", inst->getOpcode(), inst->getOpcodeName());
> +    return false;
> +  }
> +
> +  bool Scalarize::scalarizeShuffleVector(ShuffleVectorInst* sv)
> +  {
> +    //     %res = shuffleVector <n x ty> %foo, <n x ty> bar, <n x i32> <...>
> +    // ==> nothing (just make a new VectorValues with the new components)
> +    VectorValues& vVals = vectorVals[sv];
> +
> +    int size = GetComponentCount(sv);
> +    int srcSize = GetComponentCount(sv->getOperand(0)->getType());
> +
> +    for (int i = 0; i < size; ++i) {
> +      int select = sv->getMaskValue(i);
> +
> +      if (select < 0) {
> +        vVals.setComponent(i, UndefValue::get(GetBasicType(sv->getOperand(0))));
> +        continue;
> +      }
> +
> +      // Otherwise look up the corresponding component from the correct
> +      // source.
> +      Value* selectee;
> +      if (select < srcSize) {
> +        selectee = sv->getOperand(0);
> +      } else {
> +        // Choose from the second operand
> +        select -= srcSize;
> +        selectee = sv->getOperand(1);
> +      }
> +
> +      vVals.setComponent(i, getComponent(select, selectee));
> +    }
> +
> +    return true;
> +  }
> +
> +  bool Scalarize::scalarizePerComponent(Instruction* inst)
> +  {
> +    //     dst  = op <n x ty> %foo, <n x ty> %bar
> +    // ==> dstx = op ty %foox, ty %barx
> +    //     dsty = op ty %fooy, ty %bary
> +    //     ...
> +
> +    SmallVector<Value*, 16> args(inst->op_begin(), inst->op_end());
> +
> +    makePerComponentScalarizedCalls(inst, args);
> +
> +    return true;
> +  }
> +
> +  bool Scalarize::scalarizePHI(PHINode* phi)
> +  {
> +    //     dst = phi <n x ty> [ %foo, %bb1 ], [ %bar, %bb2], ...
> +    // ==> dstx = phi ty [ %foox, %bb1 ], [ %barx, %bb2], ...
> +    //     dsty = phi ty [ %fooy, %bb1 ], [ %bary, %bb2], ...
> +
> +    // If the scalar values are all known up-front, then just make the full
> +    // phinode now. If they are not yet known (phinode for a loop variant
> +    // variable), then deferr the arguments until later
> +
> +    if (canGetComponentArgs(phi)) {
> +      SmallVector<Value*, 8> args(phi->op_begin(), phi->op_end());
> +      makePerComponentScalarizedCalls(phi, args);
> +    } else {
> +      makePerComponentScalarizedCalls(phi, ArrayRef<Value*>());
> +      incompletePhis.push_back(phi);
> +    }
> +
> +    return true;
> +  }
> +
> +  void Scalarize::extractFromeVector(Value* insn) {

> +    VectorValues& vVals = vectorVals[insn];
> +
> +    for (int i = 0; i < GetComponentCount(insn); ++i) {
> +      Value *cv = ConstantInt::get(intTy, i);
> +      Value *EI = builder->CreateExtractElement(insn, cv);
> +      vVals.setComponent(i, EI);
> +      //unit.fakeInsnMap[EI] = insn;
> +      unit.newValueProxy(insn, EI, i, 0);
> +    }
> +  }
> +
> +  Value* Scalarize::InsertToVector(Value * insn, Value* vecValue) {
> +    //VectorValues& vVals = vectorVals[writeValue];
> +    //unit.vecValuesMap[call] = vectorVals[writeValue];
> +
> +    //add fake insert instructions to avoid removed
> +    Value *II = NULL;
> +    for (int i = 0; i < GetComponentCount(vecValue); ++i) {
> +      Value *vec = II ? II : UndefValue::get(vecValue->getType());
> +      Value *cv = ConstantInt::get(intTy, i);
> +      II = builder->CreateInsertElement(vec, getComponent(i, vecValue), cv);
> +      //unit.vecValuesMap[insn].setComponent(i, getComponent(i, writeValue));
> +      //unit.newValueProxy(getComponent(i, vecValue), vecValue, 0, i);
> +      //unit.fakeInsnMap[II] = insn;
> +    }
> +
> +    for (int i = 0; i < GetComponentCount(vecValue); ++i) {
> +      unit.newValueProxy(getComponent(i, vecValue), II, 0, i);
> +    }
> +    return II;
> +  }
> +
> +  bool Scalarize::scalarizeFuncCall(CallInst* call) {
> +    if (Function *F = call->getCalledFunction()) {
> +      if (F->getIntrinsicID() != 0) {   //Intrinsic functions
> +        NOT_IMPLEMENTED;
> +      } else {
> +        Value *Callee = call->getCalledValue();
> +        const std::string fnName = Callee->getName();
> +        auto it = instrinsicMap.map.find(fnName);
> +        GBE_ASSERT(it != instrinsicMap.map.end());
> +
> +        // Get the function arguments
> +        CallSite CS(call);
> +        CallSite::arg_iterator CI = CS.arg_begin() + 3;
> +
> +        switch (it->second) {
> +          default: break;
> +          case GEN_OCL_READ_IMAGE0:
> +          case GEN_OCL_READ_IMAGE1:
> +          case GEN_OCL_READ_IMAGE2:
> +          case GEN_OCL_READ_IMAGE3:
> +          case GEN_OCL_READ_IMAGE4:
> +          case GEN_OCL_READ_IMAGE5:
> +          case GEN_OCL_READ_IMAGE10:
> +          case GEN_OCL_READ_IMAGE11:
> +          case GEN_OCL_READ_IMAGE12:
> +          case GEN_OCL_READ_IMAGE13:
> +          case GEN_OCL_READ_IMAGE14:
> +          case GEN_OCL_READ_IMAGE15:
> +          {
> +            extractFromeVector(call);
> +            break;
> +          }
> +          case GEN_OCL_WRITE_IMAGE10:
> +          case GEN_OCL_WRITE_IMAGE11:
> +          case GEN_OCL_WRITE_IMAGE12:
> +          case GEN_OCL_WRITE_IMAGE13:
> +          case GEN_OCL_WRITE_IMAGE14:
> +          case GEN_OCL_WRITE_IMAGE15:
> +            CI++;
> +          case GEN_OCL_WRITE_IMAGE0:
> +          case GEN_OCL_WRITE_IMAGE1:
> +          case GEN_OCL_WRITE_IMAGE2:
> +          case GEN_OCL_WRITE_IMAGE3:
> +          case GEN_OCL_WRITE_IMAGE4:
> +          case GEN_OCL_WRITE_IMAGE5:
> +          {
> +            *CI = InsertToVector(call, *CI);
> +            break;
> +          }
> +        }
> +      }
> +    }
> +    return false;
> +  }
> +
> +  bool Scalarize::scalarizeLoad(LoadInst* ld)
> +  {
> +    extractFromeVector(ld);
> +    return false;
> +  }
> +
> +  bool Scalarize::scalarizeStore(StoreInst* st) {
> +    st->setOperand(0, InsertToVector(st, st->getValueOperand()));
> +    return false;
> +  }
> +
> +  bool Scalarize::scalarizeExtract(ExtractElementInst* extr)
> +  {
> +    //     %res = extractelement <n X ty> %foo, %i
> +    // ==> nothing (just use %foo's %ith component instead of %res)
> +
> +    if (! isa<Constant>(extr->getOperand(1))) {
> +        // TODO: Variably referenced components. Probably handle/emulate through
> +        // a series of selects.
> +        NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Variably referenced vector components");
> +    }
> +    //if (isa<Argument>(extr->getOperand(0)))
> +    //  return false;
> +    int component = GetConstantInt(extr->getOperand(1));
> +    Value* v = getComponent(component, extr->getOperand(0));
> +    if(extr == v)
> +      return false;
> +    extr->replaceAllUsesWith(v);
> +
> +    return true;
> +  }
> +
> +  bool Scalarize::scalarizeInsert(InsertElementInst* ins)
> +  {
> +    //     %res = insertValue <n x ty> %foo, %i
> +    // ==> nothing (just make a new VectorValues with the new component)
> +
> +    if (! isa<Constant>(ins->getOperand(2))) {
> +      // TODO: Variably referenced components. Probably handle/emulate through
> +      // a series of selects.
> +      NOT_IMPLEMENTED;   //gla::UnsupportedFunctionality("Variably referenced vector components");
> +    }
> +
> +    int component = GetConstantInt(ins->getOperand(2));
> +
> +    VectorValues& vVals = vectorVals[ins];
> +    for (int i = 0; i < GetComponentCount(ins); ++i) {
> +      vVals.setComponent(i, i == component ? ins->getOperand(1)
> +                                           : getComponent(i, ins->getOperand(0)));
> +    }
> +
> +    return true;
> +  }
> +
> +  void Scalarize::scalarizeArgs(Function& F)  {
> +    if (F.arg_empty())
> +      return;
> +    ReversePostOrderTraversal<Function*> rpot(&F);
> +    BasicBlock::iterator instI = (*rpot.begin())->begin();
> +    builder->SetInsertPoint(instI);
> +
> +    Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
> +
> +#if LLVM_VERSION_MINOR <= 1
> +    const AttrListPtr &PAL = F.getAttributes();
> +    uint32_t argID = 1; // Start at one actually
> +    for (; I != E; ++I, ++argID) {
> +#else
> +    for (; I != E; ++I) {
> +#endif /* LLVM_VERSION_MINOR <= 1 */
> +      Type *type = I->getType();
> +
> +      if(type->isVectorTy())
> +        extractFromeVector(I);
> +    }
> +    return;
> +  }
> +
> +  bool Scalarize::runOnFunction(Function& F)
> +  {
> +    switch (F.getCallingConv()) {
> +    case CallingConv::PTX_Device:
> +      return false;
> +    case CallingConv::PTX_Kernel:
> +      break;
> +    default: GBE_ASSERTM(false, "Unsupported calling convention");
> +    }
> +
> +    bool changed = false;
> +    module = F.getParent();
> +    intTy = IntegerType::get(module->getContext(), 32);
> +    floatTy = Type::getFloatTy(module->getContext());
> +    builder = new IRBuilder<>(module->getContext());
> +
> +    scalarizeArgs(F);
> +
> +    typedef ReversePostOrderTraversal<Function*> RPOTType;
> +    RPOTType rpot(&F);
> +    for (RPOTType::rpo_iterator bbI = rpot.begin(), bbE = rpot.end(); bbI != bbE; ++bbI) {
> +      for (BasicBlock::iterator instI = (*bbI)->begin(), instE = (*bbI)->end(); instI != instE; ++instI) {
> +        bool scalarized = scalarize(instI);
> +        if (scalarized) {
> +          changed = true;
> +          // TODO: uncomment when done
> +          deadList.push_back(instI);
> +        }
> +      }
> +    }
> +
> +    // Fill in the incomplete phis
> +    for (SmallVectorImpl<PHINode*>::iterator phiI = incompletePhis.begin(), phiE = incompletePhis.end();
> +       phiI != phiE; ++phiI) {
> +      assert(canGetComponentArgs(*phiI) && "Phi's operands never scalarized");
> +
> +      // Fill in each component of this phi
> +      VectorValues& vVals = vectorVals[*phiI];
> +      for (int c = 0; c < GetComponentCount(*phiI); ++c) {
> +        PHINode* compPhi = dyn_cast<PHINode>(vVals.getComponent(c));
> +        assert(compPhi && "Vector phi got scalarized to non-phis?");
> +
> +        // Loop over pairs of operands: [Value*, BasicBlock*]
> +        for (unsigned int i = 0; i < (*phiI)->getNumOperands(); i++) {
> +          BasicBlock* bb = (*phiI)->getIncomingBlock(i);
> +          assert(bb && "Non-basic block incoming block?");
> +          compPhi->addIncoming(getComponent(c, (*phiI)->getOperand(i)), bb);
> +        }
> +      }
> +    }
> +
> +    dce();
> +
> +    delete builder;
> +    builder = 0;
> +
> +    return changed;
> +  }
> +
> +  void Scalarize::dce()
> +  {
> +    //two passes delete for some phinode
> +    for (std::vector<Instruction*>::reverse_iterator i = deadList.rbegin(), e = deadList.rend(); i != e; ++i) {
> +      (*i)->dropAllReferences();
> +      if((*i)->use_empty())
> +        (*i)->eraseFromParent();
> +    }
> +    for (std::vector<Instruction*>::reverse_iterator i = deadList.rbegin(), e = deadList.rend(); i != e; ++i) {
> +      if((*i)->getParent())
> +        (*i)->eraseFromParent();
> +    }
> +    deadList.clear();
> +  }
> +
> +  void Scalarize::getAnalysisUsage(AnalysisUsage& AU) const
> +  {
> +  }
> +
> +  void Scalarize::print(raw_ostream&, const Module*) const
> +  {
> +      return;
> +  }
> +  FunctionPass* createScalarizePass(ir::Unit &unit)
> +  {
> +      return new Scalarize(unit);
> +  }
> +  char Scalarize::ID = 0;
> +
> +} // end namespace
> diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
> index ea3d9eb..559cde0 100644
> --- a/backend/src/llvm/llvm_to_gen.cpp
> +++ b/backend/src/llvm/llvm_to_gen.cpp
> @@ -69,6 +69,7 @@ namespace gbe
>      // Print the code before further optimizations
>      if (OCL_OUTPUT_LLVM_BEFORE_EXTRA_PASS)
>        passes.add(createPrintModulePass(&*o));
> +    passes.add(createScalarizePass(unit));        // Expand all vector ops
>      passes.add(createScalarReplAggregatesPass()); // Break up allocas
>      passes.add(createRemoveGEPPass(unit));
>      passes.add(createConstantPropagationPass());
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet