[Beignet] [PATCH 2/2] GBE: expand large integer instructions

Mon Feb 2 22:43:25 PST 2015

The patch LGTM. So the llvm_legalize.cpp should be deleted now, right?

On Tue, Jan 27, 2015 at 04:38:25PM +0800, Ruiling Song wrote:
> The pass is also from PNaCl. which lower large integer into smaller ones.
> But different from previous legalize pass. It is easy to handle various
> instructions like load or phi with large integer operand.
> 
> I find that instruction combine may make me hard to totally eliminate
> empty block. As CFG simplify pass may generate switch-case when preventing
> empty block. And Switch lower pass after CFG simplify may make empty block
> still exist in Gen IR. So, I temporarily disable the empty block check in backend.
> 
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
>  backend/src/CMakeLists.txt               |    1 +
>  backend/src/ir/context.cpp               |    3 +-
>  backend/src/llvm/ExpandLargeIntegers.cpp |  764 ++++++++++++++++++++++++++++++
>  backend/src/llvm/llvm_gen_backend.hpp    |    1 +
>  backend/src/llvm/llvm_to_gen.cpp         |   21 +-
>  5 files changed, 779 insertions(+), 11 deletions(-)
>  create mode 100644 backend/src/llvm/ExpandLargeIntegers.cpp
> 
> diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
> index 907d8a3..04f3918 100644
> --- a/backend/src/CMakeLists.txt
> +++ b/backend/src/CMakeLists.txt
> @@ -86,6 +86,7 @@ set (GBE_SRC
>      llvm/llvm_printf_parser.cpp
>      llvm/ExpandConstantExpr.cpp
>      llvm/ExpandUtils.cpp
> +    llvm/ExpandLargeIntegers.cpp
>      llvm/llvm_to_gen.cpp
>      llvm/llvm_loadstore_optimization.cpp
>      llvm/llvm_gen_backend.hpp
> diff --git a/backend/src/ir/context.cpp b/backend/src/ir/context.cpp
> index 875a529..2412fe9 100644
> --- a/backend/src/ir/context.cpp
> +++ b/backend/src/ir/context.cpp
> @@ -76,7 +76,8 @@ namespace ir {
>      // function
>      lowerReturn(unit, fn->getName());
>      // check if there is empty labels at first
> -    fn->checkEmptyLabels();
> +    // FIXME: I don't find a way to elimimate all empty blocks. temporary disable this check
> +    //fn->checkEmptyLabels();
>      // Properly order labels and compute the CFG, it's needed by FunctionArgumentLower
>      fn->sortLabels();
>      fn->computeCFG();
> diff --git a/backend/src/llvm/ExpandLargeIntegers.cpp b/backend/src/llvm/ExpandLargeIntegers.cpp
> new file mode 100644
> index 0000000..cbb2708
> --- /dev/null
> +++ b/backend/src/llvm/ExpandLargeIntegers.cpp
> @@ -0,0 +1,764 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see <http://www.gnu.org/licenses/>.
> + *
> + */
> +
> +// Copyright (c) 2003-2014 University of Illinois at Urbana-Champaign.
> +// All rights reserved.
> +//
> +// Developed by:
> +//
> +//    LLVM Team
> +//
> +//    University of Illinois at Urbana-Champaign
> +//
> +//    http://llvm.org
> +//
> +// Permission is hereby granted, free of charge, to any person obtaining a copy of
> +// this software and associated documentation files (the "Software"), to deal with
> +// the Software without restriction, including without limitation the rights to
> +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
> +// of the Software, and to permit persons to whom the Software is furnished to do
> +// so, subject to the following conditions:
> +//
> +//    * Redistributions of source code must retain the above copyright notice,
> +//      this list of conditions and the following disclaimers.
> +//
> +//   * Redistributions in binary form must reproduce the above copyright notice,
> +//      this list of conditions and the following disclaimers in the
> +//      documentation and/or other materials provided with the distribution.
> +//
> +//    * Neither the names of the LLVM Team, University of Illinois at
> +//      Urbana-Champaign, nor the names of its contributors may be used to
> +//      endorse or promote products derived from this Software without specific
> +//      prior written permission.
> +//
> +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
> +// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
> +// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
> +// SOFTWARE.
> +
> +//===- ExpandLargeIntegers.cpp - Expand illegal integers for PNaCl ABI ----===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License.
> +//
> +// A limited set of transformations to expand illegal-sized int types.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// Legal sizes for the purposes of expansion are anything 64 bits or less.
> +// Operations on large integers are split into operations on smaller-sized
> +// integers. The low parts should always be powers of 2, but the high parts may
> +// not be. A subsequent pass can promote those. For now this pass only intends
> +// to support the uses generated by clang, which is basically just for large
> +// bitfields.
> +//
> +// Limitations:
> +// 1) It can't change function signatures or global variables.
> +// 3) Doesn't support mul, div/rem, switch.
> +// 4) Doesn't handle arrays or structs (or GEPs) with illegal types.
> +// 5) Doesn't handle constant expressions (it also doesn't produce them, so it
> +//    can run after ExpandConstantExpr).
> +//
> +// The PNaCl version does not handle bitcast between vector and large integer.
> +// So I develop the bitcast from/to vector logic.
> +// TODO: 1. When we do lshr/trunc, and we know it is cast from a vector, we can
> +//          optimize it to extractElement.
> +//       2. OR x, 0 can be optimized as x. And x, 0 can be optimized as 0.
> +//===----------------------------------------------------------------------===//
> +
> +#include "llvm/ADT/DenseMap.h"
> +#include "llvm/ADT/PostOrderIterator.h"
> +#include "llvm/ADT/STLExtras.h"
> +#include "llvm/ADT/SmallVector.h"
> +#include "llvm/IR/CFG.h"
> +#include "llvm/IR/DataLayout.h"
> +#include "llvm/IR/DerivedTypes.h"
> +#include "llvm/IR/Function.h"
> +#include "llvm/IR/IRBuilder.h"
> +#include "llvm/IR/Instructions.h"
> +#include "llvm/Pass.h"
> +#include "llvm/Support/Debug.h"
> +#include "llvm/Support/MathExtras.h"
> +#include "llvm/Support/raw_ostream.h"
> +#include "llvm_gen_backend.hpp"
> +
> +using namespace llvm;
> +
> +#define DEBUG_TYPE "nacl-expand-ints"
> +
> +// Break instructions up into no larger than 64-bit chunks.
> +static const unsigned kChunkBits = 64;
> +static const unsigned kChunkBytes = kChunkBits / CHAR_BIT;
> +
> +namespace {
> +class ExpandLargeIntegers : public FunctionPass {
> +public:
> +  static char ID;
> +  ExpandLargeIntegers() : FunctionPass(ID) {
> +  }
> +  bool runOnFunction(Function &F) override;
> +};
> +
> +template <typename T> struct LoHiPair {
> +  T Lo, Hi;
> +  LoHiPair() : Lo(), Hi() {}
> +  LoHiPair(T Lo, T Hi) : Lo(Lo), Hi(Hi) {}
> +};
> +typedef LoHiPair<IntegerType *> TypePair;
> +typedef LoHiPair<Value *> ValuePair;
> +typedef LoHiPair<unsigned> AlignPair;
> +
> +struct VectorElement {
> +  Value *parent;
> +  unsigned childId;
> +  VectorElement() : parent(NULL), childId(0) {}
> +  VectorElement(Value *p, unsigned i) : parent(p), childId(i) {}
> +};
> +
> +// Information needed to patch a phi node which forward-references a value.
> +struct ForwardPHI {
> +  Value *Val;
> +  PHINode *Lo, *Hi;
> +  unsigned ValueNumber;
> +  ForwardPHI(Value *Val, PHINode *Lo, PHINode *Hi, unsigned ValueNumber)
> +      : Val(Val), Lo(Lo), Hi(Hi), ValueNumber(ValueNumber) {}
> +};
> +}
> +
> +char ExpandLargeIntegers::ID = 0;
> +
> +static bool isLegalBitSize(unsigned Bits) {
> +  assert(Bits && "Can't have zero-size integers");
> +  return Bits <= kChunkBits;
> +}
> +
> +static TypePair getExpandedIntTypes(Type *Ty) {
> +  unsigned BitWidth = Ty->getIntegerBitWidth();
> +  assert(!isLegalBitSize(BitWidth));
> +  return TypePair(IntegerType::get(Ty->getContext(), kChunkBits),
> +                  IntegerType::get(Ty->getContext(), BitWidth - kChunkBits));
> +}
> +
> +// Return true if Val is an int which should be converted.
> +static bool shouldConvert(const Value *Val) {
> +  Type *Ty = Val->getType();
> +  if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
> +    return !isLegalBitSize(ITy->getBitWidth());
> +  return false;
> +}
> +
> +// Return a pair of constants expanded from C.
> +static ValuePair expandConstant(Constant *C) {
> +  assert(shouldConvert(C));
> +  TypePair ExpandedTypes = getExpandedIntTypes(C->getType());
> +  if (isa<UndefValue>(C)) {
> +    return ValuePair(UndefValue::get(ExpandedTypes.Lo),
> +                     UndefValue::get(ExpandedTypes.Hi));
> +  } else if (ConstantInt *CInt = dyn_cast<ConstantInt>(C)) {
> +    Constant *ShiftAmt = ConstantInt::get(
> +        CInt->getType(), ExpandedTypes.Lo->getBitWidth(), false);
> +    return ValuePair(
> +        ConstantExpr::getTrunc(CInt, ExpandedTypes.Lo),
> +        ConstantExpr::getTrunc(ConstantExpr::getLShr(CInt, ShiftAmt),
> +                               ExpandedTypes.Hi));
> +  }
> +  errs() << "Value: " << *C << "\n";
> +  report_fatal_error("Unexpected constant value");
> +}
> +
> +template <typename T>
> +static AlignPair getAlign(const DataLayout &DL, T *I, Type *PrefAlignTy) {
> +  unsigned LoAlign = I->getAlignment();
> +  if (LoAlign == 0)
> +    LoAlign = DL.getPrefTypeAlignment(PrefAlignTy);
> +  unsigned HiAlign = MinAlign(LoAlign, kChunkBytes);
> +  return AlignPair(LoAlign, HiAlign);
> +}
> +
> +namespace {
> +// Holds the state for converting/replacing values. We visit instructions in
> +// reverse post-order, phis are therefore the only instructions which can be
> +// visited before the value they use.
> +class ConversionState {
> +public:
> +  // Return the expanded values for Val.
> +  ValuePair getConverted(Value *Val) {
> +    assert(shouldConvert(Val));
> +    // Directly convert constants.
> +    if (Constant *C = dyn_cast<Constant>(Val))
> +      return expandConstant(C);
> +    if (RewrittenIllegals.count(Val)) {
> +      ValuePair Found = RewrittenIllegals[Val];
> +      if (RewrittenLegals.count(Found.Lo))
> +        Found.Lo = RewrittenLegals[Found.Lo];
> +      if (RewrittenLegals.count(Found.Hi))
> +        Found.Hi = RewrittenLegals[Found.Hi];
> +      return Found;
> +    }
> +    errs() << "Value: " << *Val << "\n";
> +    report_fatal_error("Expanded value not found in map");
> +  }
> +
> +  // Returns whether a converted value has been recorded. This is only useful
> +  // for phi instructions: they can be encountered before the incoming
> +  // instruction, whereas RPO order guarantees that other instructions always
> +  // use converted values.
> +  bool hasConverted(Value *Val) {
> +    assert(shouldConvert(Val));
> +    return dyn_cast<Constant>(Val) || RewrittenIllegals.count(Val);
> +  }
> +
> +  // Record a forward phi, temporarily setting it to use Undef. This will be
> +  // patched up at the end of RPO.
> +  ValuePair recordForwardPHI(Value *Val, PHINode *Lo, PHINode *Hi,
> +                             unsigned ValueNumber) {
> +    DEBUG(dbgs() << "\tRecording as forward PHI\n");
> +    ForwardPHIs.push_back(ForwardPHI(Val, Lo, Hi, ValueNumber));
> +    return ValuePair(UndefValue::get(Lo->getType()),
> +                     UndefValue::get(Hi->getType()));
> +  }
> +
> +  void recordConverted(Instruction *From, const ValuePair &To) {
> +    DEBUG(dbgs() << "\tTo:  " << *To.Lo << "\n");
> +    DEBUG(dbgs() << "\tAnd: " << *To.Hi << "\n");
> +    ToErase.push_back(From);
> +    RewrittenIllegals[From] = To;
> +  }
> +
> +  // Replace the uses of From with To, give From's name to To, and mark To for
> +  // deletion.
> +  void recordConverted(Instruction *From, Value *To) {
> +    assert(!shouldConvert(From));
> +    DEBUG(dbgs() << "\tTo:  " << *To << "\n");
> +    ToErase.push_back(From);
> +    // From does not produce an illegal value, update its users in place.
> +    From->replaceAllUsesWith(To);
> +    To->takeName(From);
> +    RewrittenLegals[From] = To;
> +  }
> +
> +  void patchForwardPHIs() {
> +    DEBUG(if (!ForwardPHIs.empty()) dbgs() << "Patching forward PHIs:\n");
> +    for (ForwardPHI &F : ForwardPHIs) {
> +      ValuePair Ops = getConverted(F.Val);
> +      F.Lo->setIncomingValue(F.ValueNumber, Ops.Lo);
> +      F.Hi->setIncomingValue(F.ValueNumber, Ops.Hi);
> +      DEBUG(dbgs() << "\t" << *F.Lo << "\n\t" << *F.Hi << "\n");
> +    }
> +  }
> +
> +  void eraseReplacedInstructions() {
> +    for (Instruction *I : ToErase)
> +      I->dropAllReferences();
> +
> +    for (Instruction *I : ToErase)
> +      I->eraseFromParent();
> +  }
> +
> +  void addEraseCandidate(Instruction *c) {
> +    ToErase.push_back(c);
> +  }
> +
> +  void appendElement(Value *v, Value *e) {
> +    if (ExtractElement.count(v) == 0) {
> +      SmallVector<Value *, 16> tmp;
> +      tmp.push_back(e);
> +      ExtractElement[v] = tmp;
> +    } else
> +      ExtractElement[v].push_back(e);
> +  }
> +
> +  Value *getElement(Value *v, unsigned id) {
> +    return (ExtractElement[v])[id];
> +  }
> +  VectorElement &getVectorMap(Value *child) {
> +    return VectorIllegals[child];
> +  }
> +
> +  bool convertedVector(Value *vector) {
> +    return VectorIllegals.count(vector) > 0 ? true : false;
> +  }
> +
> +  void recordVectorMap(Value *child, VectorElement elem) {
> +    VectorIllegals[child] = elem;
> +  }
> +
> +private:
> +  // Maps illegal values to their new converted lo/hi values.
> +  DenseMap<Value *, ValuePair> RewrittenIllegals;
> +  // Maps legal values to their new converted value.
> +  DenseMap<Value *, Value *> RewrittenLegals;
> +  // Illegal values which have already been converted, will be erased.
> +  SmallVector<Instruction *, 32> ToErase;
> +  // PHIs which were encountered but had forward references. They need to get
> +  // patched up after RPO traversal.
> +  SmallVector<ForwardPHI, 32> ForwardPHIs;
> +  // helpers to solve bitcasting from vector to illegal integer types
> +  // Maps a Value to its original Vector and elemId
> +  DenseMap<Value *, VectorElement> VectorIllegals;
> +  // cache the ExtractElement Values
> +  DenseMap<Value *, SmallVector<Value *, 16>> ExtractElement;
> +};
> +} // Anonymous namespace
> +
> +static Value *buildVectorOrScalar(ConversionState &State, IRBuilder<> &IRB, SmallVector<Value *, 16> Elements) {
> +  assert(!Elements.empty());
> +  Type *IntTy = IntegerType::get(IRB.getContext(), 32);
> +
> +  if (Elements.size() > 1) {
> +    Value * vec = NULL;
> +    unsigned ElemNo = Elements.size();
> +    Type *ElemTy = Elements[0]->getType();
> +    bool KeepInsert = isLegalBitSize(ElemTy->getIntegerBitWidth() * ElemNo);
> +    for (unsigned i = 0; i < ElemNo; ++i) {
> +      Value *tmp = vec ? vec : UndefValue::get(VectorType::get(ElemTy, ElemNo));
> +      Value *idx = ConstantInt::get(IntTy, i);
> +      vec = IRB.CreateInsertElement(tmp, Elements[i], idx);
> +      if (!KeepInsert) {
> +        State.addEraseCandidate(cast<Instruction>(vec));
> +      }
> +    }
> +    return vec;
> +  } else {
> +    return Elements[0];
> +  }
> +}
> +
> +void getSplitedValue(ConversionState &State, Value *Val, SmallVector<Value *, 16> &Result) {
> +  while (shouldConvert(Val)) {
> +    ValuePair Convert = State.getConverted(Val);
> +    Result.push_back(Convert.Lo);
> +    Val = Convert.Hi;
> +  }
> +  Result.push_back(Val);
> +}
> +
> +static void convertInstruction(Instruction *Inst, ConversionState &State,
> +                               const DataLayout &DL) {
> +  DEBUG(dbgs() << "Expanding Large Integer: " << *Inst << "\n");
> +  // Set the insert point *after* Inst, so that any instructions inserted here
> +  // will be visited again. That allows iterative expansion of types > i128.
> +  BasicBlock::iterator InsertPos(Inst);
> +  IRBuilder<> IRB(++InsertPos);
> +  StringRef Name = Inst->getName();
> +
> +  if (PHINode *Phi = dyn_cast<PHINode>(Inst)) {
> +    unsigned N = Phi->getNumIncomingValues();
> +    TypePair OpTys = getExpandedIntTypes(Phi->getIncomingValue(0)->getType());
> +    PHINode *Lo = IRB.CreatePHI(OpTys.Lo, N, Twine(Name + ".lo"));
> +    PHINode *Hi = IRB.CreatePHI(OpTys.Hi, N, Twine(Name + ".hi"));
> +    for (unsigned I = 0; I != N; ++I) {
> +      Value *InVal = Phi->getIncomingValue(I);
> +      BasicBlock *InBB = Phi->getIncomingBlock(I);
> +      // If the value hasn't already been converted then this is a
> +      // forward-reference PHI which needs to be patched up after RPO traversal.
> +      ValuePair Ops = State.hasConverted(InVal)
> +                          ? State.getConverted(InVal)
> +                          : State.recordForwardPHI(InVal, Lo, Hi, I);
> +      Lo->addIncoming(Ops.Lo, InBB);
> +      Hi->addIncoming(Ops.Hi, InBB);
> +    }
> +    State.recordConverted(Phi, ValuePair(Lo, Hi));
> +
> +  } else if (ZExtInst *ZExt = dyn_cast<ZExtInst>(Inst)) {
> +    Value *Operand = ZExt->getOperand(0);
> +    Type *OpTy = Operand->getType();
> +    TypePair Tys = getExpandedIntTypes(Inst->getType());
> +    Value *Lo, *Hi;
> +    if (OpTy->getIntegerBitWidth() <= kChunkBits) {
> +      Lo = IRB.CreateZExt(Operand, Tys.Lo, Twine(Name, ".lo"));
> +      Hi = ConstantInt::get(Tys.Hi, 0);
> +    } else {
> +      ValuePair Ops = State.getConverted(Operand);
> +      Lo = Ops.Lo;
> +      Hi = IRB.CreateZExt(Ops.Hi, Tys.Hi, Twine(Name, ".hi"));
> +    }
> +    State.recordConverted(ZExt, ValuePair(Lo, Hi));
> +
> +  } else if (TruncInst *Trunc = dyn_cast<TruncInst>(Inst)) {
> +    Value *Operand = Trunc->getOperand(0);
> +    assert(shouldConvert(Operand) && "TruncInst is expandable but not its op");
> +    TypePair OpTys = getExpandedIntTypes(Operand->getType());
> +    ValuePair Ops = State.getConverted(Operand);
> +    if (!shouldConvert(Inst)) {
> +      Value *NewInst = IRB.CreateTrunc(Ops.Lo, Trunc->getType(), Name);
> +      State.recordConverted(Trunc, NewInst);
> +    } else {
> +      TypePair Tys = getExpandedIntTypes(Trunc->getType());
> +      assert(Tys.Lo == OpTys.Lo);
> +      Value *Lo = Ops.Lo;
> +      Value *Hi = IRB.CreateTrunc(Ops.Hi, Tys.Hi, Twine(Name, ".hi"));
> +      State.recordConverted(Trunc, ValuePair(Lo, Hi));
> +    }
> +
> +  } else if (BitCastInst *Cast = dyn_cast<BitCastInst>(Inst)) {
> +    Value *Operand = Cast->getOperand(0);
> +    bool DstVec = Inst->getType()->isVectorTy();
> +
> +    Type *IntTy = IntegerType::get(Cast->getContext(), 32);
> +    if (DstVec) {
> +      // integer to vector, get all children and bitcast
> +      SmallVector<Value *, 16> Split;
> +      getSplitedValue(State, Operand, Split);
> +
> +      Value *vec = NULL;
> +      unsigned ElemNo = Split.size();
> +      Type *ElemTy = Split[0]->getType();
> +      for (unsigned i = 0; i < ElemNo; ++i) {
> +        Value *tmp = vec ? vec : UndefValue::get(VectorType::get(ElemTy, ElemNo));
> +        Value *idx = ConstantInt::get(IntTy, i);
> +        vec = IRB.CreateInsertElement(tmp, Split[i], idx);
> +      }
> +      if (vec->getType() != Cast->getType())
> +        vec = IRB.CreateBitCast(vec, Cast->getType());
> +      State.recordConverted(Cast, vec);
> +    } else {
> +      // vector to integer
> +      assert(Operand->getType()->isVectorTy());
> +      VectorType *VecTy = cast<VectorType>(Operand->getType());
> +      Type *LargeTy = Inst->getType();
> +      Type *ElemTy = VecTy->getElementType();
> +      unsigned ElemNo = VecTy->getNumElements();
> +      Value * VectorRoot = NULL;
> +      unsigned ChildIndex = 0;
> +
> +      if (State.convertedVector(Operand)) {
> +        VectorElement VE = State.getVectorMap(Operand);
> +        VectorRoot = VE.parent;
> +        ChildIndex = VE.childId;
> +      } else {
> +        for (unsigned i =0; i < ElemNo; i++)
> +          State.appendElement(Operand,
> +                              IRB.CreateExtractElement(Operand, ConstantInt::get(IntTy, i))
> +                             );
> +        VectorRoot = Operand;
> +      }
> +
> +      TypePair OpTys = getExpandedIntTypes(LargeTy);
> +      Value *Lo, *Hi;
> +      unsigned LowNo = OpTys.Lo->getIntegerBitWidth() / ElemTy->getIntegerBitWidth();
> +      unsigned HighNo = OpTys.Hi->getIntegerBitWidth() / ElemTy->getIntegerBitWidth();
> +
> +      SmallVector<Value *, 16> LoElems;
> +      for (unsigned i = 0; i < LowNo; ++i)
> +        LoElems.push_back(State.getElement(VectorRoot, i+ChildIndex));
> +
> +      Lo = IRB.CreateBitCast(buildVectorOrScalar(State, IRB, LoElems), OpTys.Lo, Twine(Name, ".lo"));
> +
> +      SmallVector<Value *, 16> HiElem;
> +      for (unsigned i = 0; i < HighNo; ++i)
> +        HiElem.push_back(State.getElement(VectorRoot, i+LowNo+ChildIndex));
> +
> +      Value *NewVec = buildVectorOrScalar(State, IRB, HiElem);
> +      Hi = IRB.CreateBitCast(NewVec, OpTys.Hi);
> +
> +      State.recordVectorMap(NewVec, VectorElement(VectorRoot, LowNo + ChildIndex));
> +      State.recordConverted(Cast, ValuePair(Lo, Hi));
> +    }
> +
> +  } else if (BinaryOperator *Binop = dyn_cast<BinaryOperator>(Inst)) {
> +    ValuePair Lhs = State.getConverted(Binop->getOperand(0));
> +    ValuePair Rhs = State.getConverted(Binop->getOperand(1));
> +    TypePair Tys = getExpandedIntTypes(Binop->getType());
> +    Instruction::BinaryOps Op = Binop->getOpcode();
> +    switch (Op) {
> +    case Instruction::And:
> +    case Instruction::Or:
> +    case Instruction::Xor: {
> +      Value *Lo = IRB.CreateBinOp(Op, Lhs.Lo, Rhs.Lo, Twine(Name, ".lo"));
> +      Value *Hi = IRB.CreateBinOp(Op, Lhs.Hi, Rhs.Hi, Twine(Name, ".hi"));
> +      State.recordConverted(Binop, ValuePair(Lo, Hi));
> +      break;
> +    }
> +
> +    case Instruction::Shl: {
> +      ConstantInt *ShlAmount = dyn_cast<ConstantInt>(Rhs.Lo);
> +      // TODO(dschuff): Expansion of variable-sized shifts isn't supported
> +      // because the behavior depends on whether the shift amount is less than
> +      // the size of the low part of the expanded type, and I haven't yet
> +      // figured out a way to do it for variable-sized shifts without splitting
> +      // the basic block. I don't believe it's actually necessary for
> +      // bitfields. Likewise for LShr below.
> +      if (!ShlAmount) {
> +        errs() << "Shift: " << *Binop << "\n";
> +        report_fatal_error("Expansion of variable-sized shifts of > 64-bit-"
> +                           "wide values is not supported");
> +      }
> +      unsigned ShiftAmount = ShlAmount->getZExtValue();
> +      if (ShiftAmount >= Binop->getType()->getIntegerBitWidth())
> +        ShiftAmount = 0; // Undefined behavior.
> +
> +      unsigned HiBits = Tys.Hi->getIntegerBitWidth();
> +      // |<------------Hi---------->|<-------Lo------>|
> +      // |                          |                 |
> +      // +--------+--------+--------+--------+--------+
> +      // |abcdefghijklmnopqrstuvwxyz|ABCDEFGHIJKLMNOPQ|
> +      // +--------+--------+--------+--------+--------+
> +      // Possible shifts:
> +      // |efghijklmnopqrstuvwxyzABCD|EFGHIJKLMNOPQ0000| Some Lo into Hi.
> +      // |vwxyzABCDEFGHIJKLMNOPQ0000|00000000000000000| Lo is 0, keep some Hi.
> +      // |DEFGHIJKLMNOPQ000000000000|00000000000000000| Lo is 0, no Hi left.
> +      Value *Lo, *Hi;
> +      if (ShiftAmount < kChunkBits) {
> +        Lo = IRB.CreateShl(Lhs.Lo, ShiftAmount, Twine(Name, ".lo"));
> +        Hi = IRB.CreateZExtOrTrunc(IRB.CreateLShr(Lhs.Lo,
> +                                                  kChunkBits - ShiftAmount,
> +                                                  Twine(Name, ".lo.shr")),
> +                                   Tys.Hi, Twine(Name, ".lo.ext"));
> +      } else {
> +        Lo = ConstantInt::get(Tys.Lo, 0);
> +        if (ShiftAmount == kChunkBits) {
> +          // Hi will be from Lo
> +          Hi = IRB.CreateZExtOrTrunc(Lhs.Lo, Tys.Hi, Twine(Name, ".lo.ext"));
> +        } else {
> +          Hi = IRB.CreateShl(
> +              IRB.CreateZExtOrTrunc(Lhs.Lo, Tys.Hi, Twine(Name, ".lo.ext")),
> +              ShiftAmount - kChunkBits, Twine(Name, ".lo.shl"));
> +        }
> +      }
> +      if (ShiftAmount < HiBits)
> +        Hi = IRB.CreateOr(
> +            Hi, IRB.CreateShl(Lhs.Hi, ShiftAmount, Twine(Name, ".hi.shl")),
> +            Twine(Name, ".or"));
> +      State.recordConverted(Binop, ValuePair(Lo, Hi));
> +      break;
> +    }
> +
> +    case Instruction::AShr:
> +    case Instruction::LShr: {
> +      ConstantInt *ShrAmount = dyn_cast<ConstantInt>(Rhs.Lo);
> +      // TODO(dschuff): Expansion of variable-sized shifts isn't supported
> +      // because the behavior depends on whether the shift amount is less than
> +      // the size of the low part of the expanded type, and I haven't yet
> +      // figured out a way to do it for variable-sized shifts without splitting
> +      // the basic block. I don't believe it's actually necessary for bitfields.
> +      if (!ShrAmount) {
> +        errs() << "Shift: " << *Binop << "\n";
> +        report_fatal_error("Expansion of variable-sized shifts of > 64-bit-"
> +                           "wide values is not supported");
> +      }
> +      bool IsArith = Op == Instruction::AShr;
> +      unsigned ShiftAmount = ShrAmount->getZExtValue();
> +      if (ShiftAmount >= Binop->getType()->getIntegerBitWidth())
> +        ShiftAmount = 0; // Undefined behavior.
> +      unsigned HiBitWidth = Tys.Hi->getIntegerBitWidth();
> +      // |<--Hi-->|<-------Lo------>|
> +      // |        |                 |
> +      // +--------+--------+--------+
> +      // |abcdefgh|ABCDEFGHIJKLMNOPQ|
> +      // +--------+--------+--------+
> +      // Possible shifts (0 is sign when doing AShr):
> +      // |0000abcd|defgABCDEFGHIJKLM| Some Hi into Lo.
> +      // |00000000|00abcdefgABCDEFGH| Hi is 0, keep some Lo.
> +      // |00000000|000000000000abcde| Hi is 0, no Lo left.
> +      Value *Lo, *Hi;
> +      if (ShiftAmount == 0) {
> +        Lo = Lhs.Lo; Hi = Lhs.Hi;
> +      } else {
> +        if (ShiftAmount < kChunkBits) {
> +          Lo = IRB.CreateShl(
> +              IsArith
> +                  ? IRB.CreateSExtOrTrunc(Lhs.Hi, Tys.Lo, Twine(Name, ".hi.ext"))
> +                  : IRB.CreateZExtOrTrunc(Lhs.Hi, Tys.Lo, Twine(Name, ".hi.ext")),
> +              kChunkBits - ShiftAmount, Twine(Name, ".hi.shl"));
> +
> +          Lo = IRB.CreateOr(
> +              Lo, IRB.CreateLShr(Lhs.Lo, ShiftAmount, Twine(Name, ".lo.shr")),
> +              Twine(Name, ".lo"));
> +        } else if (ShiftAmount == kChunkBits) {
> +          Lo = IsArith
> +                  ? IRB.CreateSExtOrTrunc(Lhs.Hi, Tys.Lo, Twine(Name, ".hi.ext"))
> +                  : IRB.CreateZExtOrTrunc(Lhs.Hi, Tys.Lo, Twine(Name, ".hi.ext"));
> +
> +        } else {
> +          Lo = IRB.CreateBinOp(Op, Lhs.Hi,
> +                               ConstantInt::get(Tys.Hi, ShiftAmount - kChunkBits),
> +                               Twine(Name, ".hi.shr"));
> +          Lo = IsArith
> +                   ? IRB.CreateSExtOrTrunc(Lo, Tys.Lo, Twine(Name, ".lo.ext"))
> +                   : IRB.CreateZExtOrTrunc(Lo, Tys.Lo, Twine(Name, ".lo.ext"));
> +        }
> +        if (ShiftAmount < HiBitWidth) {
> +          Hi = IRB.CreateBinOp(Op, Lhs.Hi, ConstantInt::get(Tys.Hi, ShiftAmount),
> +                               Twine(Name, ".hi"));
> +        } else {
> +          Hi = IsArith
> +                   ? IRB.CreateAShr(Lhs.Hi, HiBitWidth - 1, Twine(Name, ".hi"))
> +                   : ConstantInt::get(Tys.Hi, 0);
> +        }
> +      }
> +      State.recordConverted(Binop, ValuePair(Lo, Hi));
> +      break;
> +    }
> +
> +    case Instruction::Add:
> +    case Instruction::Sub: {
> +      Value *Lo, *Hi;
> +      if (Op == Instruction::Add) {
> +        Value *Limit = IRB.CreateSelect(
> +            IRB.CreateICmpULT(Lhs.Lo, Rhs.Lo, Twine(Name, ".cmp")), Rhs.Lo,
> +            Lhs.Lo, Twine(Name, ".limit"));
> +        // Don't propagate NUW/NSW to the lo operation: it can overflow.
> +        Lo = IRB.CreateBinOp(Op, Lhs.Lo, Rhs.Lo, Twine(Name, ".lo"));
> +        Value *Carry = IRB.CreateZExt(
> +            IRB.CreateICmpULT(Lo, Limit, Twine(Name, ".overflowed")), Tys.Hi,
> +            Twine(Name, ".carry"));
> +        // TODO(jfb) The hi operation could be tagged with NUW/NSW.
> +        Hi = IRB.CreateBinOp(
> +            Op, IRB.CreateBinOp(Op, Lhs.Hi, Rhs.Hi, Twine(Name, ".hi")), Carry,
> +            Twine(Name, ".carried"));
> +      } else {
> +        Value *Borrowed = IRB.CreateSExt(
> +            IRB.CreateICmpULT(Lhs.Lo, Rhs.Lo, Twine(Name, ".borrow")), Tys.Hi,
> +            Twine(Name, ".borrowing"));
> +        Lo = IRB.CreateBinOp(Op, Lhs.Lo, Rhs.Lo, Twine(Name, ".lo"));
> +        Hi = IRB.CreateBinOp(
> +            Instruction::Add,
> +            IRB.CreateBinOp(Op, Lhs.Hi, Rhs.Hi, Twine(Name, ".hi")), Borrowed,
> +            Twine(Name, ".borrowed"));
> +      }
> +      State.recordConverted(Binop, ValuePair(Lo, Hi));
> +      break;
> +    }
> +
> +    default:
> +      errs() << "Operation: " << *Binop << "\n";
> +      report_fatal_error("Unhandled BinaryOperator type in "
> +                         "ExpandLargeIntegers");
> +    }
> +
> +  } else if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
> +    Value *Op = Load->getPointerOperand();
> +    TypePair Tys = getExpandedIntTypes(Load->getType());
> +    AlignPair Align = getAlign(DL, Load, Load->getType());
> +    Value *Loty = IRB.CreateBitCast(Op, Tys.Lo->getPointerTo(),
> +                                    Twine(Op->getName(), ".loty"));
> +    Value *Lo =
> +        IRB.CreateAlignedLoad(Loty, Align.Lo, Twine(Load->getName(), ".lo"));
> +    Value *HiAddr =
> +        IRB.CreateConstGEP1_32(Loty, 1, Twine(Op->getName(), ".hi.gep"));
> +    Value *HiTy = IRB.CreateBitCast(HiAddr, Tys.Hi->getPointerTo(),
> +                                    Twine(Op->getName(), ".hity"));
> +    Value *Hi =
> +        IRB.CreateAlignedLoad(HiTy, Align.Hi, Twine(Load->getName(), ".hi"));
> +    State.recordConverted(Load, ValuePair(Lo, Hi));
> +
> +  } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
> +    Value *Ptr = Store->getPointerOperand();
> +    TypePair Tys = getExpandedIntTypes(Store->getValueOperand()->getType());
> +    ValuePair StoreVals = State.getConverted(Store->getValueOperand());
> +    AlignPair Align = getAlign(DL, Store, Store->getValueOperand()->getType());
> +    Value *Loty = IRB.CreateBitCast(Ptr, Tys.Lo->getPointerTo(),
> +                                    Twine(Ptr->getName(), ".loty"));
> +    Value *Lo = IRB.CreateAlignedStore(StoreVals.Lo, Loty, Align.Lo);
> +    Value *HiAddr =
> +        IRB.CreateConstGEP1_32(Loty, 1, Twine(Ptr->getName(), ".hi.gep"));
> +    Value *HiTy = IRB.CreateBitCast(HiAddr, Tys.Hi->getPointerTo(),
> +                                    Twine(Ptr->getName(), ".hity"));
> +    Value *Hi = IRB.CreateAlignedStore(StoreVals.Hi, HiTy, Align.Hi);
> +    State.recordConverted(Store, ValuePair(Lo, Hi));
> +
> +  } else if (ICmpInst *Icmp = dyn_cast<ICmpInst>(Inst)) {
> +    ValuePair Lhs = State.getConverted(Icmp->getOperand(0));
> +    ValuePair Rhs = State.getConverted(Icmp->getOperand(1));
> +    switch (Icmp->getPredicate()) {
> +    case CmpInst::ICMP_EQ:
> +    case CmpInst::ICMP_NE: {
> +      Value *Lo = IRB.CreateICmp(Icmp->getUnsignedPredicate(), Lhs.Lo, Rhs.Lo,
> +                                 Twine(Name, ".lo"));
> +      Value *Hi = IRB.CreateICmp(Icmp->getUnsignedPredicate(), Lhs.Hi, Rhs.Hi,
> +                                 Twine(Name, ".hi"));
> +      Value *Result =
> +          IRB.CreateBinOp(Instruction::And, Lo, Hi, Twine(Name, ".result"));
> +      State.recordConverted(Icmp, Result);
> +      break;
> +    }
> +
> +    // TODO(jfb): Implement the following cases.
> +    case CmpInst::ICMP_UGT:
> +    case CmpInst::ICMP_UGE:
> +    case CmpInst::ICMP_ULT:
> +    case CmpInst::ICMP_ULE:
> +    case CmpInst::ICMP_SGT:
> +    case CmpInst::ICMP_SGE:
> +    case CmpInst::ICMP_SLT:
> +    case CmpInst::ICMP_SLE:
> +      errs() << "Comparison: " << *Icmp << "\n";
> +      report_fatal_error("Comparisons other than equality not supported for"
> +                         "integer types larger than 64 bit");
> +    default:
> +      llvm_unreachable("Invalid integer comparison");
> +    }
> +
> +  } else if (SelectInst *Select = dyn_cast<SelectInst>(Inst)) {
> +    Value *Cond = Select->getCondition();
> +    ValuePair True = State.getConverted(Select->getTrueValue());
> +    ValuePair False = State.getConverted(Select->getFalseValue());
> +    Value *Lo = IRB.CreateSelect(Cond, True.Lo, False.Lo, Twine(Name, ".lo"));
> +    Value *Hi = IRB.CreateSelect(Cond, True.Hi, False.Hi, Twine(Name, ".hi"));
> +    State.recordConverted(Select, ValuePair(Lo, Hi));
> +
> +  } else {
> +    errs() << "Instruction: " << *Inst << "\n";
> +    report_fatal_error("Unhandle large integer expansion");
> +  }
> +}
> +
> +bool ExpandLargeIntegers::runOnFunction(Function &F) {
> +  // Don't support changing the function arguments. Illegal function arguments
> +  // should not be generated by clang.
> +  for (const Argument &Arg : F.args())
> +    if (shouldConvert(&Arg))
> +      report_fatal_error("Function " + F.getName() +
> +                         " has illegal integer argument");
> +
> +  // TODO(jfb) This should loop to handle nested forward PHIs.
> +
> +  ConversionState State;
> +  DataLayout DL(F.getParent());
> +  bool Modified = false;
> +  ReversePostOrderTraversal<Function *> RPOT(&F);
> +  for (ReversePostOrderTraversal<Function *>::rpo_iterator FI = RPOT.begin(),
> +                                                           FE = RPOT.end();
> +       FI != FE; ++FI) {
> +    BasicBlock *BB = *FI;
> +    for (Instruction &I : *BB) {
> +      // Only attempt to convert an instruction if its result or any of its
> +      // operands are illegal.
> +      bool ShouldConvert = shouldConvert(&I);
> +      for (Value *Op : I.operands())
> +        ShouldConvert |= shouldConvert(Op);
> +      if (ShouldConvert) {
> +        convertInstruction(&I, State, DL);
> +        Modified = true;
> +      }
> +    }
> +  }
> +  State.patchForwardPHIs();
> +  State.eraseReplacedInstructions();
> +  return Modified;
> +}
> +
> +FunctionPass *llvm::createExpandLargeIntegersPass() {
> +  return new ExpandLargeIntegers();
> +}
> diff --git a/backend/src/llvm/llvm_gen_backend.hpp b/backend/src/llvm/llvm_gen_backend.hpp
> index 2bd070d..b8ed9fa 100644
> --- a/backend/src/llvm/llvm_gen_backend.hpp
> +++ b/backend/src/llvm/llvm_gen_backend.hpp
> @@ -48,6 +48,7 @@ namespace llvm {
>    void PhiSafeReplaceUses(llvm::Use *U, llvm::Value *NewVal);
>  
>    FunctionPass *createExpandConstantExprPass();
> +  FunctionPass *createExpandLargeIntegersPass();
>    // Copy debug information from Original to New, and return New.
>    template <typename T> T *CopyDebug(T *New, llvm::Instruction *Original) {
>      New->setDebugLoc(Original->getDebugLoc());
> diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
> index 6a37b4e..5701563 100644
> --- a/backend/src/llvm/llvm_to_gen.cpp
> +++ b/backend/src/llvm/llvm_to_gen.cpp
> @@ -270,20 +270,21 @@ namespace gbe
>      passes.add(createScalarReplAggregatesPass(64, true, -1, -1, 64));
>      passes.add(createLoadStoreOptimizationPass());
>      passes.add(createConstantPropagationPass());
> -    passes.add(createLowerSwitchPass());
>      passes.add(createPromoteMemoryToRegisterPass());
>      if(optLevel > 0)
> -      passes.add(createGVNPass());                  // Remove redundancies
> +      passes.add(createGVNPass());                 // Remove redundancies
>      passes.add(createPrintfParserPass());
> -    passes.add(createExpandConstantExprPass());
> -    passes.add(createScalarizePass());        // Expand all vector ops
> -    passes.add(createLegalizePass());         // legalize large integer operation
> -    passes.add(createConstantPropagationPass()); // propagate constant after scalarize/legalize
> -    passes.add(createExpandConstantExprPass()); // constant prop may generate ConstantExpr
> -    passes.add(createRemoveGEPPass(unit)); // Constant prop may generate gep
> -    passes.add(createDeadInstEliminationPass());  // Remove simplified instructions
> +    passes.add(createExpandConstantExprPass());    // expand ConstantExpr
> +    passes.add(createScalarizePass());             // Expand all vector ops
> +    passes.add(createExpandLargeIntegersPass());   // legalize large integer operation
> +    passes.add(createInstructionCombiningPass());  // legalize will generate some silly instructions
> +    passes.add(createConstantPropagationPass());   // propagate constant after scalarize/legalize
> +    passes.add(createExpandConstantExprPass());    // constant prop may generate ConstantExpr
> +    passes.add(createRemoveGEPPass(unit));         // Constant prop may generate gep
> +    passes.add(createDeadInstEliminationPass());   // Remove simplified instructions
>      passes.add(createCFGSimplificationPass());     // Merge & remove BBs
> -    passes.add(createScalarizePass());        // Expand all vector ops
> +    passes.add(createLowerSwitchPass());           // simplify cfg will generate switch-case instruction
> +    passes.add(createScalarizePass());             // Expand all vector ops
>  
>      if(OCL_OUTPUT_CFG)
>        passes.add(createCFGPrinterPass());
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet