[Beignet] [PATCH V2 1/2] Add a scalarize llvm pass.

Zhigang Gong zhigang.gong at linux.intel.com
Fri May 17 00:44:32 PDT 2013


Pused with minor modification. Just noticed that your commit log
is all in one very long line. Please split it to multiple lines
with reasonable length next time.

Thanks.

On Fri, May 17, 2013 at 03:11:51PM +0800, Yang Rong wrote:
> In previous implementation expand vector ops in GenWrite, it is hard to optimize.
> Now, I add new llvm pass to scalarize. This pass will expand all normal vector ops to scalar ops, except store/load, image read/write and function's argument. Add fake ExtractElement/InsertElement instructions to avoid dead instruction elimination, and unit valueMap hold the relationship between these fake instructions and real load/store instructions.
> 
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
>  backend/src/CMakeLists.txt            |    1 +
>  backend/src/ir/unit.hpp               |   22 +-
>  backend/src/llvm/llvm_gen_backend.cpp |  241 +++-------
>  backend/src/llvm/llvm_gen_backend.hpp |   30 +-
>  backend/src/llvm/llvm_scalarize.cpp   |  836 +++++++++++++++++++++++++++++++++
>  backend/src/llvm/llvm_to_gen.cpp      |    1 +
>  6 files changed, 949 insertions(+), 182 deletions(-)
>  create mode 100644 backend/src/llvm/llvm_scalarize.cpp
> 
> diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
> index 1829964..183517a 100644
> --- a/backend/src/CMakeLists.txt
> +++ b/backend/src/CMakeLists.txt
> @@ -83,6 +83,7 @@ else (GBE_USE_BLOB)
>      backend/program.h
>      llvm/llvm_gen_backend.cpp
>      llvm/llvm_passes.cpp
> +    llvm/llvm_scalarize.cpp
>      llvm/llvm_to_gen.cpp
>      llvm/llvm_gen_backend.hpp
>      llvm/llvm_gen_ocl_function.hxx
> diff --git a/backend/src/ir/unit.hpp b/backend/src/ir/unit.hpp
> index ae78638..3b293f5 100644
> --- a/backend/src/ir/unit.hpp
> +++ b/backend/src/ir/unit.hpp
> @@ -1,4 +1,4 @@
> -/* 
> +/*
>   * Copyright © 2012 Intel Corporation
>   *
>   * This library is free software; you can redistribute it and/or
> @@ -24,9 +24,12 @@
>  #ifndef __GBE_IR_UNIT_HPP__
>  #define __GBE_IR_UNIT_HPP__
>  
> +#include "llvm/Value.h"
> +
>  #include "ir/constant.hpp"
>  #include "ir/register.hpp"
>  #include "sys/hash_map.hpp"
> +#include "sys/map.hpp"
>  
>  namespace gbe {
>  namespace ir {
> @@ -41,6 +44,7 @@ namespace ir {
>    {
>    public:
>      typedef hash_map<std::string, Function*> FunctionSet;
> +    typedef std::pair<llvm::Value*, uint32_t> ValueIndex;
>      /*! Create an empty unit */
>      Unit(PointerSize pointerSize = POINTER_32_BITS);
>      /*! Release everything (*including* the function pointers) */
> @@ -71,11 +75,27 @@ namespace ir {
>      ConstantSet& getConstantSet(void) { return constantSet; }
>      /*! Return the constant set */
>      const ConstantSet& getConstantSet(void) const { return constantSet; }
> +
> +    /*! Some values will not be allocated. For example a vector extract and
> +     * a vector insertion when scalarize the vector load/store
> +     */
> +    void newValueProxy(llvm::Value *real,
> +                       llvm::Value *fake,
> +                       uint32_t realIndex = 0u,
> +                       uint32_t fakeIndex = 0u) {
> +      const ValueIndex key(fake, fakeIndex);
> +      const ValueIndex value(real, realIndex);
> +      GBE_ASSERT(valueMap.find(key) == valueMap.end()); // Do not insert twice
> +      valueMap[key] = value;
> +    }
> +    /*! Return the value map */
> +    const map<ValueIndex, ValueIndex>& getValueMap(void) const { return valueMap; }
>    private:
>      friend class ContextInterface; //!< Can free modify the unit
>      hash_map<std::string, Function*> functions; //!< All the defined functions
>      ConstantSet constantSet; //!< All the constants defined in the unit
>      PointerSize pointerSize; //!< Size shared by all pointers
> +    map<ValueIndex, ValueIndex> valueMap; //!< fake to real value map for vector load/store
>      GBE_CLASS(Unit);
>    };
>  
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index 8dcf15c..3855011 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -1,4 +1,4 @@
> -/* 
> +/*
>   * Copyright © 2012 Intel Corporation
>   *
>   * This library is free software; you can redistribute it and/or
> @@ -60,7 +60,7 @@
>   * dependencies on endianness or ABIs. Fortunately, the ptx (and nvptx for LLVM
>   * 3.2) profile is pretty well adapted to our needs since NV and Gen GPU are
>   * kind of similar, or at least they are similar enough to share the same front
> - * end. 
> + * end.
>   *
>   * Problems
>   * ========
> @@ -126,10 +126,8 @@
>  #include "ir/context.hpp"
>  #include "ir/unit.hpp"
>  #include "ir/liveness.hpp"
> -#include "sys/map.hpp"
>  #include "sys/set.hpp"
>  #include "sys/cvar.hpp"
> -#include <algorithm>
>  
>  /* Not defined for LLVM 3.0 */
>  #if !defined(LLVM_VERSION_MAJOR)
> @@ -207,7 +205,7 @@ namespace gbe
>    /*! Type to register family translation */
>    static ir::RegisterFamily getFamily(const ir::Context &ctx, const Type *type)
>    {
> -    GBE_ASSERT(isScalarType(type) == true); 
> +    GBE_ASSERT(isScalarType(type) == true);
>      if (type == Type::getInt1Ty(type->getContext()))
>        return ir::FAMILY_BOOL;
>      if (type == Type::getInt8Ty(type->getContext()))
> @@ -269,6 +267,8 @@ namespace gbe
>    class RegisterTranslator
>    {
>    public:
> +    /*! Indices will be zero for scalar values */
> +    typedef std::pair<Value*, uint32_t> ValueIndex;
>      RegisterTranslator(ir::Context &ctx) : ctx(ctx) {}
>  
>      /*! Empty the maps */
> @@ -289,6 +289,11 @@ namespace gbe
>        GBE_ASSERT(valueMap.find(key) == valueMap.end()); // Do not insert twice
>        valueMap[key] = value;
>      }
> +    /*! After scalarize pass, there are some valueMap in unit,
> +     *  use this function to copy from unit valueMap */
> +    void initValueMap(const map<ValueIndex, ValueIndex>& vMap) {
> +      valueMap.insert(vMap.begin(), vMap.end());
> +    }
>      /*! Mostly used for the preallocated registers (lids, gids) */
>      void newScalarProxy(ir::Register reg, Value *value, uint32_t index = 0u) {
>        const ValueIndex key(value, index);
> @@ -325,10 +330,9 @@ namespace gbe
>        };
>        return ir::Register();
>      }
> -    /*! Get the register from the given value at given index possibly iterating
> -     *  in the value map to get the final real register
> -     */
> -    ir::Register getScalar(Value *value, uint32_t index = 0u) {
> +
> +    /*! iterating in the value map to get the final real register */
> +    void getRealValue(Value* &value, uint32_t& index) {
>        auto end = valueMap.end();
>        for (;;) {
>          auto it = valueMap.find(std::make_pair(value, index));
> @@ -339,6 +343,14 @@ namespace gbe
>            index = it->second.second;
>          }
>        }
> +    }
> +
> +    /*! Get the register from the given value at given index possibly iterating
> +     *  in the value map to get the final real register
> +     */
> +    ir::Register getScalar(Value *value, uint32_t index = 0u) {
> +      getRealValue(value, index);
> +
>        const auto key = std::make_pair(value, index);
>        GBE_ASSERT(scalarMap.find(key) != scalarMap.end());
>        return scalarMap[key];
> @@ -351,16 +363,8 @@ namespace gbe
>      }
>      /*! Says if the value exists. Otherwise, it is undefined */
>      bool valueExists(Value *value, uint32_t index) {
> -      auto end = valueMap.end();
> -      for (;;) {
> -        auto it = valueMap.find(std::make_pair(value, index));
> -        if (it == end)
> -          break;
> -        else {
> -          value = it->second.first;
> -          index = it->second.second;
> -        }
> -      }
> +      getRealValue(value, index);
> +
>        const auto key = std::make_pair(value, index);
>        return scalarMap.find(key) != scalarMap.end();
>      }
> @@ -375,8 +379,6 @@ namespace gbe
>        this->insertRegister(reg, key, index);
>        return reg;
>      }
> -    /*! Indices will be zero for scalar values */
> -    typedef std::pair<Value*, uint32_t> ValueIndex;
>      /*! Map value to ir::Register */
>      map<ValueIndex, ir::Register> scalarMap;
>      /*! Map values to values when this is only a translation (eq bitcast) */
> @@ -384,28 +386,6 @@ namespace gbe
>      /*! Actually allocates the registers */
>      ir::Context &ctx;
>    };
> -  /*! All intrinsic Gen functions */
> -  enum OCLInstrinsic {
> -#define DECL_LLVM_GEN_FUNCTION(ID, NAME) GEN_OCL_##ID,
> -#include "llvm_gen_ocl_function.hxx"
> -#undef DECL_LLVM_GEN_FUNCTION
> -  };
> -
> -  /*! Build the hash map for OCL functions on Gen */
> -  struct OCLIntrinsicMap {
> -    /*! Build the intrinsic hash map */
> -    OCLIntrinsicMap(void) {
> -#define DECL_LLVM_GEN_FUNCTION(ID, NAME) \
> -  map.insert(std::make_pair(#NAME, GEN_OCL_##ID));
> -#include "llvm_gen_ocl_function.hxx"
> -#undef DECL_LLVM_GEN_FUNCTION
> -    }
> -    /*! Sort intrinsics with their names */
> -    hash_map<std::string, OCLInstrinsic> map;
> -  };
> -
> -  /*! Sort the OCL Gen instrinsic functions (built on pre-main) */
> -  static const OCLIntrinsicMap instrinsicMap;
>  
>    /*! Translate LLVM IR code to Gen IR code */
>    class GenWriter : public FunctionPass, public InstVisitor<GenWriter>
> @@ -423,7 +403,7 @@ namespace gbe
>       */
>      set<const Value*> conditionSet;
>      /*! We visit each function twice. Once to allocate the registers and once to
> -     *  emit the Gen IR instructions 
> +     *  emit the Gen IR instructions
>       */
>      enum Pass {
>        PASS_EMIT_REGISTERS = 0,
> @@ -663,7 +643,7 @@ namespace gbe
>      if (dyn_cast<ConstantAggregateZero>(CPV)) {
>        return doIt(uint32_t(0)); // XXX Handle type
>      } else {
> -      if (dyn_cast<ConstantVector>(CPV)) 
> +      if (dyn_cast<ConstantVector>(CPV))
>          CPV = extractConstantElem(CPV, index);
>        GBE_ASSERTM(dyn_cast<ConstantExpr>(CPV) == NULL, "Unsupported constant expression");
>  
> @@ -756,6 +736,9 @@ namespace gbe
>    }
>  
>    ir::Register GenWriter::getRegister(Value *value, uint32_t elemID) {
> +    //the real value may be constant, so get real value before constant check
> +    regTranslator.getRealValue(value, elemID);
> +
>      if (dyn_cast<ConstantExpr>(value)) {
>        ConstantExpr *ce = dyn_cast<ConstantExpr>(value);
>        if(ce->isCast()) {
> @@ -867,6 +850,7 @@ namespace gbe
>                  "Returned value for kernel functions is forbidden");
>      // Loop over the arguments and output registers for them
>      if (!F.arg_empty()) {
> +      uint32_t argID = 0;
>        Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
>  
>        // Insert a new register for each function argument
> @@ -875,10 +859,33 @@ namespace gbe
>        uint32_t argID = 1; // Start at one actually
>        for (; I != E; ++I, ++argID) {
>  #else
> -      for (; I != E; ++I) {
> +      for (; I != E; ++I, ++argID) {
>  #endif /* LLVM_VERSION_MINOR <= 1 */
>          const std::string &argName = I->getName().str();
>          Type *type = I->getType();
> +
> +        //add support for vector argument
> +        if(type->isVectorTy()) {
> +          VectorType *vectorType = cast<VectorType>(type);
> +
> +          this->newRegister(I);
> +          ir::Register reg = getRegister(I, 0);
> +
> +          Type *elemType = vectorType->getElementType();
> +          const uint32_t elemSize = getTypeByteSize(unit, elemType);
> +          const uint32_t elemNum = vectorType->getNumElements();
> +          //vector's elemType always scalar type
> +          ctx.input(argName, ir::FunctionArgument::VALUE, reg, elemNum*elemSize);
> +
> +          ir::Function& fn = ctx.getFunction();
> +          for(uint32_t i=1; i < elemNum; i++) {
> +            ir::PushLocation argLocation(fn, argID, elemSize*i);
> +            reg = getRegister(I, i);
> +            ctx.appendPushedConstant(reg, argLocation);  //add to push map for reg alloc
> +          }
> +          continue;
> +        }
> +
>          GBE_ASSERTM(isScalarType(type) == true,
>                      "vector type in the function argument is not supported yet");
>          const ir::Register reg = regTranslator.newScalar(I);
> @@ -916,7 +923,6 @@ namespace gbe
>                  ctx.input(argName, ir::FunctionArgument::IMAGE, reg, ptrSize);
>                  ctx.getFunction().getImageSet()->append(reg, &ctx);
>                break;
> -              break;
>                default: GBE_ASSERT(addrSpace != ir::MEM_PRIVATE);
>              }
>            }
> @@ -1141,6 +1147,7 @@ namespace gbe
>  
>      ctx.startFunction(F.getName());
>      this->regTranslator.clear();
> +    this->regTranslator.initValueMap(unit.getValueMap());
>      this->labelMap.clear();
>      this->emitFunctionPrototype(F);
>  
> @@ -1495,141 +1502,15 @@ namespace gbe
>      ir::Context &ctx;
>    };
>  
> -  void GenWriter::regAllocateInsertElement(InsertElementInst &I) {
> -    Value *modified = I.getOperand(0);
> -    Value *toInsert = I.getOperand(1);
> -    Value *index = I.getOperand(2);
> -
> -    // Get the index for the insertion
> -    Constant *CPV = dyn_cast<Constant>(index);
> -    GBE_ASSERTM(CPV != NULL, "only constant indices when inserting values");
> -    auto x = processConstant<ir::Immediate>(CPV, InsertExtractFunctor(ctx));
> -    GBE_ASSERTM(x.type == ir::TYPE_U32 || x.type == ir::TYPE_S32,
> -                "Invalid index type for InsertElement");
> -
> -    // Crash on overrun
> -    VectorType *vectorType = cast<VectorType>(modified->getType());
> -    const uint32_t elemNum = vectorType->getNumElements();
> -    const uint32_t modifiedID = x.data.u32;
> -    GBE_ASSERTM(modifiedID < elemNum, "Out-of-bound index for InsertElement");
> -
> -    // The source vector is not constant
> -    if (!isa<Constant>(modified) || isa<UndefValue>(modified)) {
> -       // Non modified values are just proxies
> -       for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
> -         if (elemID != modifiedID)
> -           regTranslator.newValueProxy(modified, &I, elemID, elemID);
> -     }
> -     // The source vector is constant
> -     else {
> -       // Non modified values will use LOADI
> -       for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
> -         if (elemID != modifiedID) {
> -           const ir::Type type = getType(ctx, toInsert->getType());
> -           const ir::Register reg = ctx.reg(getFamily(type));
> -           regTranslator.insertRegister(reg, &I, elemID);
> -         }
> -     }
> -
> -     // If the element to insert is an immediate we will generate a LOADI.
> -     // Otherwise, the value is just a proxy of the inserted value
> -     if (dyn_cast<Constant>(toInsert) != NULL) {
> -       const ir::Type type = getType(ctx, toInsert->getType());
> -       const ir::Register reg = ctx.reg(getFamily(type));
> -       regTranslator.insertRegister(reg, &I, modifiedID);
> -     } else
> -       regTranslator.newValueProxy(toInsert, &I, 0, modifiedID);
> -  }
> -
> -  void GenWriter::emitInsertElement(InsertElementInst &I) {
> -    // Note that we check everything in regAllocateInsertElement
> -    Value *modified = I.getOperand(0);
> -    Value *toInsert = I.getOperand(1);
> -    Value *index = I.getOperand(2);
> -
> -    // Get the index of the value to insert
> -    Constant *indexCPV = dyn_cast<Constant>(index);
> -    auto x = processConstant<ir::Immediate>(indexCPV, InsertExtractFunctor(ctx));
> -    const uint32_t modifiedID = x.data.u32;
> -
> -    // The source vector is constant. We need to insert LOADI for the unmodified
> -    // values
> -    if (isa<Constant>(modified) && !isa<UndefValue>(modified)) {
> -      VectorType *vectorType = cast<VectorType>(modified->getType());
> -      const uint32_t elemNum = vectorType->getNumElements();
> -      for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
> -        if (elemID != modifiedID) {
> -          Constant *sourceCPV = dyn_cast<Constant>(modified);
> -          if (isa<UndefValue>(extractConstantElem(sourceCPV, elemID)) == false) {
> -            const ir::ImmediateIndex immIndex = this->newImmediate(sourceCPV, elemID);
> -            const ir::Immediate imm = ctx.getImmediate(immIndex);
> -            const ir::Register reg = regTranslator.getScalar(&I, elemID);
> -            ctx.LOADI(imm.type, reg, immIndex);
> -          }
> -        }
> -    }
> -
> -    // If the inserted value is not a constant, we just use a proxy
> -    if (dyn_cast<Constant>(toInsert) == NULL)
> -      return;
> -
> -    // We need a LOADI if we insert an immediate
> -    Constant *toInsertCPV = dyn_cast<Constant>(toInsert);
> -    const ir::ImmediateIndex immIndex = this->newImmediate(toInsertCPV);
> -    const ir::Immediate imm = ctx.getImmediate(immIndex);
> -    const ir::Register reg = regTranslator.getScalar(&I, modifiedID);
> -    ctx.LOADI(imm.type, reg, immIndex);
> -  }
> -
> -  void GenWriter::regAllocateExtractElement(ExtractElementInst &I) {
> -    Value *extracted = I.getOperand(0);
> -    Value *index = I.getOperand(1);
> -    GBE_ASSERTM(isa<Constant>(extracted) == false,
> -                "TODO support constant vector for extract");
> -    Constant *CPV = dyn_cast<Constant>(index);
> -    GBE_ASSERTM(CPV != NULL, "only constant indices when inserting values");
> -    auto x = processConstant<ir::Immediate>(CPV, InsertExtractFunctor(ctx));
> -    GBE_ASSERTM(x.type == ir::TYPE_U32 || x.type == ir::TYPE_S32,
> -                "Invalid index type for InsertElement");
> -
> -    // Crash on overrun
> -    const uint32_t extractedID = x.data.u32;
> -#if GBE_DEBUG
> -    VectorType *vectorType = cast<VectorType>(extracted->getType());
> -    const uint32_t elemNum = vectorType->getNumElements();
> -    GBE_ASSERTM(extractedID < elemNum, "Out-of-bound index for InsertElement");
> -#endif /* GBE_DEBUG */
> -
> -    // Easy when the vector is not immediate
> -    regTranslator.newValueProxy(extracted, &I, extractedID, 0);
> -  }
> -
> -  void GenWriter::emitExtractElement(ExtractElementInst &I) {
> -    // TODO -> insert LOADI when the extracted vector is constant
> -  }
> +  /*! Because there are still fake insert/extract instruction for
> +   *  load/store, so keep empty function here */
> +  void GenWriter::regAllocateInsertElement(InsertElementInst &I) {}
> +  void GenWriter::emitInsertElement(InsertElementInst &I) {}
>  
> -  void GenWriter::regAllocateShuffleVectorInst(ShuffleVectorInst &I) {
> -    Value *first = I.getOperand(0);
> -    Value *second = I.getOperand(1);
> -    GBE_ASSERTM(!isa<Constant>(first) || isa<UndefValue>(first),
> -                "TODO support constant vector for shuffle");
> -    GBE_ASSERTM(!isa<Constant>(second) || isa<UndefValue>(second),
> -                "TODO support constant vector for shuffle");
> -    VectorType *dstType = cast<VectorType>(I.getType());
> -    VectorType *srcType = cast<VectorType>(first->getType());
> -    const uint32_t dstElemNum = dstType->getNumElements();
> -    const uint32_t srcElemNum = srcType->getNumElements();
> -    for (uint32_t elemID = 0; elemID < dstElemNum; ++elemID) {
> -      uint32_t srcID = I.getMaskValue(elemID);
> -      Value *src = first;
> -      if (srcID >= srcElemNum) {
> -        srcID -= srcElemNum;
> -        src = second;
> -      }
> -      regTranslator.newValueProxy(src, &I, srcID, elemID);
> -    }
> -  }
> +  void GenWriter::regAllocateExtractElement(ExtractElementInst &I) {}
> +  void GenWriter::emitExtractElement(ExtractElementInst &I) {}
>  
> +  void GenWriter::regAllocateShuffleVectorInst(ShuffleVectorInst &I) {}
>    void GenWriter::emitShuffleVectorInst(ShuffleVectorInst &I) {}
>  
>    void GenWriter::regAllocateSelectInst(SelectInst &I) {
> diff --git a/backend/src/llvm/llvm_gen_backend.hpp b/backend/src/llvm/llvm_gen_backend.hpp
> index c270924..2ad879e 100644
> --- a/backend/src/llvm/llvm_gen_backend.hpp
> +++ b/backend/src/llvm/llvm_gen_backend.hpp
> @@ -1,4 +1,4 @@
> -/* 
> +/*
>   * Copyright © 2012 Intel Corporation
>   *
>   * This library is free software; you can redistribute it and/or
> @@ -28,6 +28,9 @@
>  
>  #include "llvm/Pass.h"
>  #include "sys/platform.hpp"
> +#include "sys/map.hpp"
> +#include "sys/hash_map.hpp"
> +#include <algorithm>
>  
>  // LLVM Type
>  namespace llvm { class Type; }
> @@ -37,6 +40,29 @@ namespace gbe
>    // Final target of the Gen backend
>    namespace ir { class Unit; }
>  
> +  /*! All intrinsic Gen functions */
> +  enum OCLInstrinsic {
> +#define DECL_LLVM_GEN_FUNCTION(ID, NAME) GEN_OCL_##ID,
> +#include "llvm_gen_ocl_function.hxx"
> +#undef DECL_LLVM_GEN_FUNCTION
> +  };
> +
> +  /*! Build the hash map for OCL functions on Gen */
> +  struct OCLIntrinsicMap {
> +    /*! Build the intrinsic hash map */
> +    OCLIntrinsicMap(void) {
> +#define DECL_LLVM_GEN_FUNCTION(ID, NAME) \
> +  map.insert(std::make_pair(#NAME, GEN_OCL_##ID));
> +#include "llvm_gen_ocl_function.hxx"
> +#undef DECL_LLVM_GEN_FUNCTION
> +    }
> +    /*! Sort intrinsics with their names */
> +    hash_map<std::string, OCLInstrinsic> map;
> +  };
> +
> +  /*! Sort the OCL Gen instrinsic functions (built on pre-main) */
> +  static const OCLIntrinsicMap instrinsicMap;
> +
>    /*! Pad the offset */
>    uint32_t getPadding(uint32_t offset, uint32_t align);
>  
> @@ -55,6 +81,8 @@ namespace gbe
>    /*! Remove the GEP instructions */
>    llvm::BasicBlockPass *createRemoveGEPPass(const ir::Unit &unit);
>  
> +  llvm::FunctionPass* createScalarizePass(ir::Unit &unit);
> +
>  } /* namespace gbe */
>  
>  #endif /* __GBE_LLVM_GEN_BACKEND_HPP__ */
> diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp
> new file mode 100644
> index 0000000..411e723
> --- /dev/null
> +++ b/backend/src/llvm/llvm_scalarize.cpp
> @@ -0,0 +1,836 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +/**
> + * \file llvm_scalarize.cpp
> + */
> +
> +//===- Scalarize.cpp - Scalarize LunarGLASS IR ----------------------------===//
> +//
> +// LunarGLASS: An Open Modular Shader Compiler Architecture
> +// Copyright (C) 2010-2011 LunarG, Inc.
> +//
> +// This program is free software; you can redistribute it and/or
> +// modify it under the terms of the GNU General Public License
> +// as published by the Free Software Foundation; version 2 of the
> +// License.
> +//
> +// This program is distributed in the hope that it will be useful,
> +// but WITHOUT ANY WARRANTY; without even the implied warranty of
> +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +// GNU General Public License for more details.
> +//
> +// You should have received a copy of the GNU General Public License
> +// along with this program; if not, write to the Free Software
> +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> +// 02110-1301, USA.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// Author: Michael Ilseman, LunarG
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// Scalarize the IR.
> +//   * Loads of uniforms become multiple loadComponent calls
> +//
> +//   * Reads/writes become read/writeComponent calls
> +//
> +//   * Component-wise operations become multiple ops over each component
> +//
> +//   * Texture call become recomponsed texture calls
> +//
> +//   * Vector ops disappear, with their users referring to the scalarized
> +//   * components
> +//
> +//===----------------------------------------------------------------------===//
> +
> +/* THIS CODE IS DERIVED FROM GPL LLVM PTX BACKEND. CODE IS HERE:
> + * http://sourceforge.net/scm/?type=git&group_id=319085
> + * Note that however, the original author, Heldge Rhodin, granted me (Benjamin
> + * Segovia) the right to use another license for it (MIT here)
> + */
> +
> +#include "llvm/ADT/DenseMap.h"
> +#include "llvm/ADT/PostOrderIterator.h"
> +#include "llvm/Function.h"
> +#include "llvm/InstrTypes.h"
> +#include "llvm/Instructions.h"
> +#include "llvm/IntrinsicInst.h"
> +#include "llvm/Module.h"
> +#include "llvm/Pass.h"
> +#include "llvm/IRBuilder.h"
> +#include "llvm/Support/CallSite.h"
> +#include "llvm/Support/CFG.h"
> +#include "llvm/Support/raw_ostream.h"
> +
> +#include "llvm/llvm_gen_backend.hpp"
> +#include "ir/unit.hpp"
> +#include "sys/map.hpp"
> +
> +
> +using namespace llvm;
> +
> +namespace gbe {
> +
> +  struct VectorValues {
> +    VectorValues() : vals()
> +    { }
> +
> +    void setComponent(int c, llvm::Value* val)
> +    {
> +      assert(c >= 0 && c < 16 && "Out of bounds component");
> +      vals[c] = val;
> +    }
> +    llvm::Value* getComponent(int c)
> +    {
> +      assert(c >= 0 && c < 16 && "Out of bounds component");
> +      assert(vals[c] && "Requesting non-existing component");
> +      return vals[c];
> +    }
> +
> +    // {Value* x, Value* y, Value* z, Value* w}
> +    llvm::Value* vals[16];
> +  };
> +
> +  class Scalarize : public FunctionPass {
> +
> +  public:
> +    // Standard pass stuff
> +    static char ID;
> +
> +    Scalarize(ir::Unit& unit) : FunctionPass(ID), unit(unit)
> +    {
> +      initializeLoopInfoPass(*PassRegistry::getPassRegistry());
> +      initializeDominatorTreePass(*PassRegistry::getPassRegistry());
> +    }
> +
> +    virtual bool runOnFunction(Function&);
> +    void print(raw_ostream&, const Module* = 0) const;
> +    virtual void getAnalysisUsage(AnalysisUsage&) const;
> +
> +  protected:
> +    // An instruction is valid post-scalarization iff it is fully scalar or it
> +    // is a gla_loadn
> +    bool isValid(const Instruction*);
> +
> +    // Take an instruction that produces a vector, and scalarize it
> +    bool scalarize(Instruction*);
> +    bool scalarizePerComponent(Instruction*);
> +    bool scalarizeFuncCall(CallInst *);
> +    bool scalarizeLoad(LoadInst*);
> +    bool scalarizeStore(StoreInst*);
> +    //bool scalarizeIntrinsic(IntrinsicInst*);
> +    bool scalarizeExtract(ExtractElementInst*);
> +    bool scalarizeInsert(InsertElementInst*);
> +    bool scalarizeShuffleVector(ShuffleVectorInst*);
> +    bool scalarizePHI(PHINode*);
> +    void scalarizeArgs(Function& F);
> +    // ...
> +
> +    // Helpers to make the actual multiple scalar calls, one per
> +    // component. Updates the given VectorValues's components with the new
> +    // Values.
> +    void makeScalarizedCalls(Function*, ArrayRef<Value*>, int numComponents, VectorValues&);
> +
> +    void makePerComponentScalarizedCalls(Instruction*, ArrayRef<Value*>);
> +
> +    // Makes a scalar form of the given instruction: replaces the operands
> +    // and chooses a correct return type
> +    Instruction* createScalarInstruction(Instruction* inst, ArrayRef<Value*>);
> +
> +    // Gather the specified components in the given values. Returns the
> +    // component if the given value is a vector, or the scalar itself.
> +    void gatherComponents(int component, ArrayRef<Value*> args, SmallVectorImpl<Value*>& componentArgs);
> +
> +    // Get the assigned component for that value. If the value is a scalar,
> +    // returns the scalar. If it's a constant, returns that component. If
> +    // it's an instruction, returns the vectorValues of that instruction for
> +    // that component
> +    Value* getComponent(int component, Value*);
> +
> +    // Used for assertion purposes. Whether we can get the component out with
> +    // a getComponent call
> +    bool canGetComponent(Value*);
> +
> +    // Used for assertion purposes. Whether for every operand we can get
> +    // components with a getComponent call
> +    bool canGetComponentArgs(User*);
> +
> +    // Delete the instruction in the deadList
> +    void dce();
> +
> +
> +    int GetConstantInt(const Value* value);
> +    bool IsPerComponentOp(const Instruction* inst);
> +    bool IsPerComponentOp(const Value* value);
> +
> +    //these function used to add extract and insert instructions when load/store etc.
> +    void extractFromeVector(Value* insn);
> +    Value* InsertToVector(Value* insn, Value* vecValue);
> +
> +    Type* GetBasicType(Value* value) {
> +      return GetBasicType(value->getType());
> +    }
> +
> +    Type* GetBasicType(Type* type) {
> +      switch(type->getTypeID()) {
> +      case Type::VectorTyID:
> +      case Type::ArrayTyID:
> +        return GetBasicType(type->getContainedType(0));
> +      default:
> +        break;
> +      }
> +      return type;
> +    }
> +
> +    int GetComponentCount(const Type* type)  {
> +      if (type->getTypeID() == Type::VectorTyID)
> +        return llvm::dyn_cast<VectorType>(type)->getNumElements();
> +      else
> +        return 1;
> +    }
> +
> +    int GetComponentCount(const Value* value) {
> +      return GetComponentCount(value->getType());
> +    }
> +
> +    DenseMap<Value*, VectorValues> vectorVals;
> +    Module* module;
> +    IRBuilder<>* builder;
> +
> +    Type* intTy;
> +    Type* floatTy;
> +    ir::Unit &unit;
> +
> +    std::vector<Instruction*> deadList;
> +
> +    // List of vector phis that were not completely scalarized because some
> +    // of their operands hadn't before been visited (i.e. loop variant
> +    // variables)
> +    SmallVector<PHINode*, 16> incompletePhis;
> +  };
> +
> +  Value* Scalarize::getComponent(int component, Value* v)
> +  {
> +    assert(canGetComponent(v) && "getComponent called on unhandled vector");
> +
> +    if (v->getType()->isVectorTy()) {
> +      if (ConstantDataVector* c = dyn_cast<ConstantDataVector>(v)) {
> +        return c->getElementAsConstant(component);
> +      } else if (ConstantVector* c = dyn_cast<ConstantVector>(v)) {
> +        return c->getOperand(component);
> +      } else if (isa<ConstantAggregateZero>(v)) {
> +        return Constant::getNullValue(GetBasicType(v));
> +      } else if (isa<UndefValue>(v)) {
> +        return UndefValue::get(GetBasicType(v));
> +      } else {
> +        return vectorVals[v].getComponent(component);
> +      }
> +    } else {
> +      return v;
> +    }
> +  }
> +
> +  bool IsPerComponentOp(const llvm::Value* value)
> +  {
> +    const llvm::Instruction* inst = llvm::dyn_cast<const llvm::Instruction>(value);
> +    return inst && IsPerComponentOp(inst);
> +  }
> +
> +  bool Scalarize::IsPerComponentOp(const Instruction* inst)
> +  {
> +    //if (const IntrinsicInst* intr = dyn_cast<const IntrinsicInst>(inst))
> +    //    return IsPerComponentOp(intr);
> +
> +    if (inst->isTerminator())
> +        return false;
> +
> +    switch (inst->getOpcode()) {
> +
> +    // Cast ops are only per-component if they cast back to the same vector
> +    // width
> +    case Instruction::Trunc:
> +    case Instruction::ZExt:
> +    case Instruction::SExt:
> +    case Instruction::FPToUI:
> +    case Instruction::FPToSI:
> +    case Instruction::UIToFP:
> +    case Instruction::SIToFP:
> +    case Instruction::FPTrunc:
> +    case Instruction::FPExt:
> +    case Instruction::PtrToInt:
> +    case Instruction::IntToPtr:
> +    case Instruction::BitCast:
> +      return GetComponentCount(inst->getOperand(0)) == GetComponentCount(inst);
> +
> +    // Vector ops
> +    case Instruction::InsertElement:
> +    case Instruction::ExtractElement:
> +    case Instruction::ShuffleVector:
> +
> +    // Ways of accessing/loading/storing vectors
> +    case Instruction::ExtractValue:
> +    case Instruction::InsertValue:
> +
> +    // Memory ops
> +    case Instruction::Alloca:
> +    case Instruction::Load:
> +    case Instruction::Store:
> +    case Instruction::GetElementPtr:
> +    // Phis are a little special. We consider them not to be per-component
> +    // because the mechanism of choice is a single value (what path we took to
> +    // get here), and doesn't choose per-component (as select would). The caller
> +    // should know to handle phis specially
> +    case Instruction::PHI:
> +    // Call insts, conservatively are no per-component
> +    case Instruction::Call:
> +    // Misc
> +    case Instruction::LandingPad:  //--- 3.0
> +    case Instruction::VAArg:
> +      return false;
> +    } // end of switch (inst->getOpcode())
> +
> +    return true;
> +  }
> +  int Scalarize::GetConstantInt(const Value* value)
> +  {
> +    const ConstantInt *constantInt = dyn_cast<ConstantInt>(value);
> +
> +    // this might still be a constant expression, rather than a numeric constant,
> +    // e.g., expression with undef's in it, so it was not folded
> +    if (! constantInt)
> +      NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("non-simple constant");
> +
> +    return constantInt->getValue().getSExtValue();
> +  }
> +  bool Scalarize::canGetComponent(Value* v)
> +  {
> +    if (v->getType()->isVectorTy()) {
> +      if (isa<ConstantDataVector>(v) || isa<ConstantVector>(v) || isa<ConstantAggregateZero>(v) || isa<UndefValue>(v)) {
> +        return true;
> +      } else {
> +        assert((isa<Instruction>(v) || isa<Argument>(v)) && "Non-constant non-instuction?");
> +        return vectorVals.count(v);
> +      }
> +    } else {
> +      return true;
> +    }
> +  }
> +
> +  bool Scalarize::canGetComponentArgs(User* u)
> +  {
> +    if (PHINode* phi = dyn_cast<PHINode>(u)) {
> +      for (unsigned int i = 0; i < phi->getNumIncomingValues(); ++i)
> +        if (!canGetComponent(phi->getIncomingValue(i)))
> +          return false;
> +    } else {
> +      for (User::op_iterator i = u->op_begin(), e = u->op_end(); i != e; ++i)
> +        if (!canGetComponent(*i))
> +          return false;
> +    }
> +    return true;
> +  }
> +
> +  void Scalarize::gatherComponents(int component, ArrayRef<Value*> args, SmallVectorImpl<Value*>& componentArgs)
> +  {
> +    componentArgs.clear();
> +    for (ArrayRef<Value*>::iterator i = args.begin(), e = args.end(); i != e; ++i)
> +      componentArgs.push_back(getComponent(component, *i));
> +  }
> +
> +  Instruction* Scalarize::createScalarInstruction(Instruction* inst, ArrayRef<Value*> args)
> +  {
> +    // TODO: Refine the below into one large switch
> +
> +    unsigned op = inst->getOpcode();
> +    if (inst->isCast()) {
> +      assert(args.size() == 1 && "incorrect number of arguments for cast op");
> +      return CastInst::Create((Instruction::CastOps)op, args[0], GetBasicType(inst));
> +    }
> +
> +    if (inst->isBinaryOp()) {
> +      assert(args.size() == 2 && "incorrect number of arguments for binary op");
> +      return BinaryOperator::Create((Instruction::BinaryOps)op, args[0], args[1]);
> +    }
> +
> +    if (PHINode* phi = dyn_cast<PHINode>(inst)) {
> +      PHINode* res = PHINode::Create(GetBasicType(inst), phi->getNumIncomingValues());
> +      assert(args.size() % 2 == 0 && "Odd number of arguments for a PHI");
> +
> +      // Loop over pairs of operands: [Value*, BasicBlock*]
> +      for (unsigned int i = 0; i < args.size(); i++) {
> +        BasicBlock* bb = phi->getIncomingBlock(i); //dyn_cast<BasicBlock>(args[i+1]);
> +        //assert(bb && "Non-basic block incoming block?");
> +        res->addIncoming(args[i], bb);
> +      }
> +
> +      return res;
> +    }
> +
> +    if (CmpInst* cmpInst = dyn_cast<CmpInst>(inst)) {
> +      assert(args.size() == 2 && "incorrect number of arguments for comparison");
> +      return CmpInst::Create(cmpInst->getOpcode(), cmpInst->getPredicate(), args[0], args[1]);
> +    }
> +
> +    if (isa<SelectInst>(inst)) {
> +      assert(args.size() == 3 && "incorrect number of arguments for select");
> +      return SelectInst::Create(args[0], args[1], args[2]);
> +    }
> +
> +    if (IntrinsicInst* intr = dyn_cast<IntrinsicInst>(inst)) {
> +      if (! IsPerComponentOp(inst))
> +        NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Scalarize instruction on a non-per-component intrinsic");
> +
> +      // TODO: Assumption is that all per-component intrinsics have all their
> +      // arguments be overloadable. Need to find some way to assert on this
> +      // assumption. This is due to how getDeclaration operates; it only takes
> +      // a list of types that fit overloadable slots.
> +      SmallVector<Type*, 8> tys(1, GetBasicType(inst->getType()));
> +      // Call instructions have the decl as a last argument, so skip it
> +      for (ArrayRef<Value*>::iterator i = args.begin(), e = args.end() - 1; i != e; ++i) {
> +        tys.push_back(GetBasicType((*i)->getType()));
> +      }
> +
> +      Function* f = Intrinsic::getDeclaration(module, intr->getIntrinsicID(), tys);
> +      return CallInst::Create(f, args);
> +    }
> +
> +    NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Currently unsupported instruction: ", inst->getOpcode(),
> +                     //             inst->getOpcodeName());
> +    return 0;
> +
> +  }
> +
> +
> +  void Scalarize::makeScalarizedCalls(Function* f, ArrayRef<Value*> args, int count, VectorValues& vVals)
> +  {
> +    assert(count > 0 && count <= 16 && "invalid number of vector components");
> +    for (int i = 0; i < count; ++i) {
> +      Value* res;
> +      SmallVector<Value*, 8> callArgs(args.begin(), args.end());
> +      callArgs.push_back(ConstantInt::get(intTy, i));
> +
> +      res = builder->CreateCall(f, callArgs);
> +      vVals.setComponent(i, res);
> +    }
> +  }
> +
> +  void Scalarize::makePerComponentScalarizedCalls(Instruction* inst, ArrayRef<Value*> args)
> +  {
> +    int count = GetComponentCount(inst);
> +    assert(count > 0 && count <= 16 && "invalid number of vector components");
> +    assert((inst->getNumOperands() == args.size() || isa<PHINode>(inst))
> +           && "not enough arguments passed for instruction");
> +
> +    VectorValues& vVals = vectorVals[inst];
> +
> +    for (int i = 0; i < count; ++i) {
> +      // Set this component of each arg
> +      SmallVector<Value*, 8> callArgs(args.size(), 0);
> +      gatherComponents(i, args, callArgs);
> +
> +      Instruction* res = createScalarInstruction(inst, callArgs);
> +
> +      vVals.setComponent(i, res);
> +      builder->Insert(res);
> +    }
> +  }
> +
> +  bool Scalarize::isValid(const Instruction* inst)
> +  {
> +    // The result
> +    if (inst->getType()->isVectorTy())
> +        return false;
> +
> +    // The arguments
> +    for (Instruction::const_op_iterator i = inst->op_begin(), e = inst->op_end(); i != e; ++i) {
> +      const Value* v = (*i);
> +      assert(v);
> +      if (v->getType()->isVectorTy())
> +        return false;
> +    }
> +
> +    return true;
> +  }
> +
> +  bool Scalarize::scalarize(Instruction* inst)
> +  {
> +    if (isValid(inst))
> +        return false;
> +
> +    assert(! vectorVals.count(inst) && "We've already scalarized this somehow?");
> +    assert((canGetComponentArgs(inst) || isa<PHINode>(inst)) &&
> +           "Scalarizing an op whose arguments haven't been scalarized ");
> +    builder->SetInsertPoint(inst);
> +
> +    if (IsPerComponentOp(inst))
> +      return scalarizePerComponent(inst);
> +
> +    if (LoadInst* ld = dyn_cast<LoadInst>(inst))
> +      return scalarizeLoad(ld);
> +
> +    if (CallInst* call = dyn_cast<CallInst>(inst))
> +      return scalarizeFuncCall(call);
> +
> +    if (ExtractElementInst* extr = dyn_cast<ExtractElementInst>(inst))
> +      return scalarizeExtract(extr);
> +
> +    if (InsertElementInst* ins = dyn_cast<InsertElementInst>(inst))
> +      return scalarizeInsert(ins);
> +
> +    if (ShuffleVectorInst* sv = dyn_cast<ShuffleVectorInst>(inst))
> +      return scalarizeShuffleVector(sv);
> +
> +    if (PHINode* phi = dyn_cast<PHINode>(inst))
> +      return scalarizePHI(phi);
> +
> +    if (isa<ExtractValueInst>(inst) || isa<InsertValueInst>(inst))
> +      // TODO: need to come up with a struct/array model for scalarization
> +      NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Scalarizing struct/array ops");
> +
> +    if (StoreInst* st = dyn_cast<StoreInst>(inst))
> +      return scalarizeStore(st);
> +
> +    NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Currently unhandled instruction ", inst->getOpcode(), inst->getOpcodeName());
> +    return false;
> +  }
> +
> +  bool Scalarize::scalarizeShuffleVector(ShuffleVectorInst* sv)
> +  {
> +    //     %res = shuffleVector <n x ty> %foo, <n x ty> bar, <n x i32> <...>
> +    // ==> nothing (just make a new VectorValues with the new components)
> +    VectorValues& vVals = vectorVals[sv];
> +
> +    int size = GetComponentCount(sv);
> +    int srcSize = GetComponentCount(sv->getOperand(0)->getType());
> +
> +    for (int i = 0; i < size; ++i) {
> +      int select = sv->getMaskValue(i);
> +
> +      if (select < 0) {
> +        vVals.setComponent(i, UndefValue::get(GetBasicType(sv->getOperand(0))));
> +        continue;
> +      }
> +
> +      // Otherwise look up the corresponding component from the correct
> +      // source.
> +      Value* selectee;
> +      if (select < srcSize) {
> +        selectee = sv->getOperand(0);
> +      } else {
> +        // Choose from the second operand
> +        select -= srcSize;
> +        selectee = sv->getOperand(1);
> +      }
> +
> +      vVals.setComponent(i, getComponent(select, selectee));
> +    }
> +
> +    return true;
> +  }
> +
> +  bool Scalarize::scalarizePerComponent(Instruction* inst)
> +  {
> +    //     dst  = op <n x ty> %foo, <n x ty> %bar
> +    // ==> dstx = op ty %foox, ty %barx
> +    //     dsty = op ty %fooy, ty %bary
> +    //     ...
> +
> +    SmallVector<Value*, 16> args(inst->op_begin(), inst->op_end());
> +
> +    makePerComponentScalarizedCalls(inst, args);
> +
> +    return true;
> +  }
> +
> +  bool Scalarize::scalarizePHI(PHINode* phi)
> +  {
> +    //     dst = phi <n x ty> [ %foo, %bb1 ], [ %bar, %bb2], ...
> +    // ==> dstx = phi ty [ %foox, %bb1 ], [ %barx, %bb2], ...
> +    //     dsty = phi ty [ %fooy, %bb1 ], [ %bary, %bb2], ...
> +
> +    // If the scalar values are all known up-front, then just make the full
> +    // phinode now. If they are not yet known (phinode for a loop variant
> +    // variable), then deferr the arguments until later
> +
> +    if (canGetComponentArgs(phi)) {
> +      SmallVector<Value*, 8> args(phi->op_begin(), phi->op_end());
> +      makePerComponentScalarizedCalls(phi, args);
> +    } else {
> +      makePerComponentScalarizedCalls(phi, ArrayRef<Value*>());
> +      incompletePhis.push_back(phi);
> +    }
> +
> +    return true;
> +  }
> +
> +  void Scalarize::extractFromeVector(Value* insn) {
> +    VectorValues& vVals = vectorVals[insn];
> +
> +    for (int i = 0; i < GetComponentCount(insn); ++i) {
> +      Value *cv = ConstantInt::get(intTy, i);
> +      Value *EI = builder->CreateExtractElement(insn, cv);
> +      vVals.setComponent(i, EI);
> +      //unit.fakeInsnMap[EI] = insn;
> +      unit.newValueProxy(insn, EI, i, 0);
> +    }
> +  }
> +
> +  Value* Scalarize::InsertToVector(Value * insn, Value* vecValue) {
> +    //VectorValues& vVals = vectorVals[writeValue];
> +    //unit.vecValuesMap[call] = vectorVals[writeValue];
> +
> +    //add fake insert instructions to avoid removed
> +    Value *II = NULL;
> +    for (int i = 0; i < GetComponentCount(vecValue); ++i) {
> +      Value *vec = II ? II : UndefValue::get(vecValue->getType());
> +      Value *cv = ConstantInt::get(intTy, i);
> +      II = builder->CreateInsertElement(vec, getComponent(i, vecValue), cv);
> +      //unit.vecValuesMap[insn].setComponent(i, getComponent(i, writeValue));
> +      //unit.newValueProxy(getComponent(i, vecValue), vecValue, 0, i);
> +      //unit.fakeInsnMap[II] = insn;
> +    }
> +
> +    for (int i = 0; i < GetComponentCount(vecValue); ++i) {
> +      unit.newValueProxy(getComponent(i, vecValue), II, 0, i);
> +    }
> +    return II;
> +  }
> +
> +  bool Scalarize::scalarizeFuncCall(CallInst* call) {
> +    if (Function *F = call->getCalledFunction()) {
> +      if (F->getIntrinsicID() != 0) {   //Intrinsic functions
> +        NOT_IMPLEMENTED;
> +      } else {
> +        Value *Callee = call->getCalledValue();
> +        const std::string fnName = Callee->getName();
> +        auto it = instrinsicMap.map.find(fnName);
> +        GBE_ASSERT(it != instrinsicMap.map.end());
> +
> +        // Get the function arguments
> +        CallSite CS(call);
> +        CallSite::arg_iterator CI = CS.arg_begin() + 3;
> +
> +        switch (it->second) {
> +          default: break;
> +          case GEN_OCL_READ_IMAGE0:
> +          case GEN_OCL_READ_IMAGE1:
> +          case GEN_OCL_READ_IMAGE2:
> +          case GEN_OCL_READ_IMAGE3:
> +          case GEN_OCL_READ_IMAGE4:
> +          case GEN_OCL_READ_IMAGE5:
> +          case GEN_OCL_READ_IMAGE10:
> +          case GEN_OCL_READ_IMAGE11:
> +          case GEN_OCL_READ_IMAGE12:
> +          case GEN_OCL_READ_IMAGE13:
> +          case GEN_OCL_READ_IMAGE14:
> +          case GEN_OCL_READ_IMAGE15:
> +          {
> +            extractFromeVector(call);
> +            break;
> +          }
> +          case GEN_OCL_WRITE_IMAGE10:
> +          case GEN_OCL_WRITE_IMAGE11:
> +          case GEN_OCL_WRITE_IMAGE12:
> +          case GEN_OCL_WRITE_IMAGE13:
> +          case GEN_OCL_WRITE_IMAGE14:
> +          case GEN_OCL_WRITE_IMAGE15:
> +            CI++;
> +          case GEN_OCL_WRITE_IMAGE0:
> +          case GEN_OCL_WRITE_IMAGE1:
> +          case GEN_OCL_WRITE_IMAGE2:
> +          case GEN_OCL_WRITE_IMAGE3:
> +          case GEN_OCL_WRITE_IMAGE4:
> +          case GEN_OCL_WRITE_IMAGE5:
> +          {
> +            *CI = InsertToVector(call, *CI);
> +            break;
> +          }
> +        }
> +      }
> +    }
> +    return false;
> +  }
> +
> +  bool Scalarize::scalarizeLoad(LoadInst* ld)
> +  {
> +    extractFromeVector(ld);
> +    return false;
> +  }
> +
> +  bool Scalarize::scalarizeStore(StoreInst* st) {
> +    st->setOperand(0, InsertToVector(st, st->getValueOperand()));
> +    return false;
> +  }
> +
> +  bool Scalarize::scalarizeExtract(ExtractElementInst* extr)
> +  {
> +    //     %res = extractelement <n X ty> %foo, %i
> +    // ==> nothing (just use %foo's %ith component instead of %res)
> +
> +    if (! isa<Constant>(extr->getOperand(1))) {
> +        // TODO: Variably referenced components. Probably handle/emulate through
> +        // a series of selects.
> +        NOT_IMPLEMENTED; //gla::UnsupportedFunctionality("Variably referenced vector components");
> +    }
> +    //if (isa<Argument>(extr->getOperand(0)))
> +    //  return false;
> +    int component = GetConstantInt(extr->getOperand(1));
> +    Value* v = getComponent(component, extr->getOperand(0));
> +    if(extr == v)
> +      return false;
> +    extr->replaceAllUsesWith(v);
> +
> +    return true;
> +  }
> +
> +  bool Scalarize::scalarizeInsert(InsertElementInst* ins)
> +  {
> +    //     %res = insertValue <n x ty> %foo, %i
> +    // ==> nothing (just make a new VectorValues with the new component)
> +
> +    if (! isa<Constant>(ins->getOperand(2))) {
> +      // TODO: Variably referenced components. Probably handle/emulate through
> +      // a series of selects.
> +      NOT_IMPLEMENTED;   //gla::UnsupportedFunctionality("Variably referenced vector components");
> +    }
> +
> +    int component = GetConstantInt(ins->getOperand(2));
> +
> +    VectorValues& vVals = vectorVals[ins];
> +    for (int i = 0; i < GetComponentCount(ins); ++i) {
> +      vVals.setComponent(i, i == component ? ins->getOperand(1)
> +                                           : getComponent(i, ins->getOperand(0)));
> +    }
> +
> +    return true;
> +  }
> +
> +  void Scalarize::scalarizeArgs(Function& F)  {
> +    if (F.arg_empty())
> +      return;
> +    ReversePostOrderTraversal<Function*> rpot(&F);
> +    BasicBlock::iterator instI = (*rpot.begin())->begin();
> +    builder->SetInsertPoint(instI);
> +
> +    Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
> +
> +#if LLVM_VERSION_MINOR <= 1
> +    const AttrListPtr &PAL = F.getAttributes();
> +    uint32_t argID = 1; // Start at one actually
> +    for (; I != E; ++I, ++argID) {
> +#else
> +    for (; I != E; ++I) {
> +#endif /* LLVM_VERSION_MINOR <= 1 */
> +      Type *type = I->getType();
> +
> +      if(type->isVectorTy())
> +        extractFromeVector(I);
> +    }
> +    return;
> +  }
> +
> +  bool Scalarize::runOnFunction(Function& F)
> +  {
> +    switch (F.getCallingConv()) {
> +    case CallingConv::PTX_Device:
> +      return false;
> +    case CallingConv::PTX_Kernel:
> +      break;
> +    default: GBE_ASSERTM(false, "Unsupported calling convention");
> +    }
> +
> +    bool changed = false;
> +    module = F.getParent();
> +    intTy = IntegerType::get(module->getContext(), 32);
> +    floatTy = Type::getFloatTy(module->getContext());
> +    builder = new IRBuilder<>(module->getContext());
> +
> +    scalarizeArgs(F);
> +
> +    typedef ReversePostOrderTraversal<Function*> RPOTType;
> +    RPOTType rpot(&F);
> +    for (RPOTType::rpo_iterator bbI = rpot.begin(), bbE = rpot.end(); bbI != bbE; ++bbI) {
> +      for (BasicBlock::iterator instI = (*bbI)->begin(), instE = (*bbI)->end(); instI != instE; ++instI) {
> +        bool scalarized = scalarize(instI);
> +        if (scalarized) {
> +          changed = true;
> +          // TODO: uncomment when done
> +          deadList.push_back(instI);
> +        }
> +      }
> +    }
> +
> +    // Fill in the incomplete phis
> +    for (SmallVectorImpl<PHINode*>::iterator phiI = incompletePhis.begin(), phiE = incompletePhis.end();
> +       phiI != phiE; ++phiI) {
> +      assert(canGetComponentArgs(*phiI) && "Phi's operands never scalarized");
> +
> +      // Fill in each component of this phi
> +      VectorValues& vVals = vectorVals[*phiI];
> +      for (int c = 0; c < GetComponentCount(*phiI); ++c) {
> +        PHINode* compPhi = dyn_cast<PHINode>(vVals.getComponent(c));
> +        assert(compPhi && "Vector phi got scalarized to non-phis?");
> +
> +        // Loop over pairs of operands: [Value*, BasicBlock*]
> +        for (unsigned int i = 0; i < (*phiI)->getNumOperands(); i++) {
> +          BasicBlock* bb = (*phiI)->getIncomingBlock(i);
> +          assert(bb && "Non-basic block incoming block?");
> +          compPhi->addIncoming(getComponent(c, (*phiI)->getOperand(i)), bb);
> +        }
> +      }
> +    }
> +
> +    dce();
> +
> +    delete builder;
> +    builder = 0;
> +
> +    return changed;
> +  }
> +
> +  void Scalarize::dce()
> +  {
> +    //two passes delete for some phinode
> +    for (std::vector<Instruction*>::reverse_iterator i = deadList.rbegin(), e = deadList.rend(); i != e; ++i) {
> +      (*i)->dropAllReferences();
> +      if((*i)->use_empty())
> +        (*i)->eraseFromParent();
> +    }
> +    for (std::vector<Instruction*>::reverse_iterator i = deadList.rbegin(), e = deadList.rend(); i != e; ++i) {
> +      if((*i)->getParent())
> +        (*i)->eraseFromParent();
> +    }
> +    deadList.clear();
> +  }
> +
> +  void Scalarize::getAnalysisUsage(AnalysisUsage& AU) const
> +  {
> +  }
> +
> +  void Scalarize::print(raw_ostream&, const Module*) const
> +  {
> +      return;
> +  }
> +  FunctionPass* createScalarizePass(ir::Unit &unit)
> +  {
> +      return new Scalarize(unit);
> +  }
> +  char Scalarize::ID = 0;
> +
> +} // end namespace
> diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
> index ea3d9eb..559cde0 100644
> --- a/backend/src/llvm/llvm_to_gen.cpp
> +++ b/backend/src/llvm/llvm_to_gen.cpp
> @@ -69,6 +69,7 @@ namespace gbe
>      // Print the code before further optimizations
>      if (OCL_OUTPUT_LLVM_BEFORE_EXTRA_PASS)
>        passes.add(createPrintModulePass(&*o));
> +    passes.add(createScalarizePass(unit));        // Expand all vector ops
>      passes.add(createScalarReplAggregatesPass()); // Break up allocas
>      passes.add(createRemoveGEPPass(unit));
>      passes.add(createConstantPropagationPass());
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list