[Beignet] [PATCH 2/2] GBE: expand large integer instructions
Zhigang Gong
zhigang.gong at linux.intel.com
Mon Feb 2 22:43:25 PST 2015
The patch LGTM. So the llvm_legalize.cpp should be deleted now, right?
On Tue, Jan 27, 2015 at 04:38:25PM +0800, Ruiling Song wrote:
> The pass is also from PNaCl. which lower large integer into smaller ones.
> But different from previous legalize pass. It is easy to handle various
> instructions like load or phi with large integer operand.
>
> I find that instruction combine may make me hard to totally eliminate
> empty block. As CFG simplify pass may generate switch-case when preventing
> empty block. And Switch lower pass after CFG simplify may make empty block
> still exist in Gen IR. So, I temporarily disable the empty block check in backend.
>
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
> backend/src/CMakeLists.txt | 1 +
> backend/src/ir/context.cpp | 3 +-
> backend/src/llvm/ExpandLargeIntegers.cpp | 764 ++++++++++++++++++++++++++++++
> backend/src/llvm/llvm_gen_backend.hpp | 1 +
> backend/src/llvm/llvm_to_gen.cpp | 21 +-
> 5 files changed, 779 insertions(+), 11 deletions(-)
> create mode 100644 backend/src/llvm/ExpandLargeIntegers.cpp
>
> diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
> index 907d8a3..04f3918 100644
> --- a/backend/src/CMakeLists.txt
> +++ b/backend/src/CMakeLists.txt
> @@ -86,6 +86,7 @@ set (GBE_SRC
> llvm/llvm_printf_parser.cpp
> llvm/ExpandConstantExpr.cpp
> llvm/ExpandUtils.cpp
> + llvm/ExpandLargeIntegers.cpp
> llvm/llvm_to_gen.cpp
> llvm/llvm_loadstore_optimization.cpp
> llvm/llvm_gen_backend.hpp
> diff --git a/backend/src/ir/context.cpp b/backend/src/ir/context.cpp
> index 875a529..2412fe9 100644
> --- a/backend/src/ir/context.cpp
> +++ b/backend/src/ir/context.cpp
> @@ -76,7 +76,8 @@ namespace ir {
> // function
> lowerReturn(unit, fn->getName());
> // check if there is empty labels at first
> - fn->checkEmptyLabels();
> + // FIXME: I don't find a way to elimimate all empty blocks. temporary disable this check
> + //fn->checkEmptyLabels();
> // Properly order labels and compute the CFG, it's needed by FunctionArgumentLower
> fn->sortLabels();
> fn->computeCFG();
> diff --git a/backend/src/llvm/ExpandLargeIntegers.cpp b/backend/src/llvm/ExpandLargeIntegers.cpp
> new file mode 100644
> index 0000000..cbb2708
> --- /dev/null
> +++ b/backend/src/llvm/ExpandLargeIntegers.cpp
> @@ -0,0 +1,764 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see <http://www.gnu.org/licenses/>.
> + *
> + */
> +
> +// Copyright (c) 2003-2014 University of Illinois at Urbana-Champaign.
> +// All rights reserved.
> +//
> +// Developed by:
> +//
> +// LLVM Team
> +//
> +// University of Illinois at Urbana-Champaign
> +//
> +// http://llvm.org
> +//
> +// Permission is hereby granted, free of charge, to any person obtaining a copy of
> +// this software and associated documentation files (the "Software"), to deal with
> +// the Software without restriction, including without limitation the rights to
> +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
> +// of the Software, and to permit persons to whom the Software is furnished to do
> +// so, subject to the following conditions:
> +//
> +// * Redistributions of source code must retain the above copyright notice,
> +// this list of conditions and the following disclaimers.
> +//
> +// * Redistributions in binary form must reproduce the above copyright notice,
> +// this list of conditions and the following disclaimers in the
> +// documentation and/or other materials provided with the distribution.
> +//
> +// * Neither the names of the LLVM Team, University of Illinois at
> +// Urbana-Champaign, nor the names of its contributors may be used to
> +// endorse or promote products derived from this Software without specific
> +// prior written permission.
> +//
> +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
> +// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> +// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
> +// SOFTWARE.
> +
> +//===- ExpandLargeIntegers.cpp - Expand illegal integers for PNaCl ABI ----===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License.
> +//
> +// A limited set of transformations to expand illegal-sized int types.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// Legal sizes for the purposes of expansion are anything 64 bits or less.
> +// Operations on large integers are split into operations on smaller-sized
> +// integers. The low parts should always be powers of 2, but the high parts may
> +// not be. A subsequent pass can promote those. For now this pass only intends
> +// to support the uses generated by clang, which is basically just for large
> +// bitfields.
> +//
> +// Limitations:
> +// 1) It can't change function signatures or global variables.
> +// 3) Doesn't support mul, div/rem, switch.
> +// 4) Doesn't handle arrays or structs (or GEPs) with illegal types.
> +// 5) Doesn't handle constant expressions (it also doesn't produce them, so it
> +// can run after ExpandConstantExpr).
> +//
> +// The PNaCl version does not handle bitcast between vector and large integer.
> +// So I develop the bitcast from/to vector logic.
> +// TODO: 1. When we do lshr/trunc, and we know it is cast from a vector, we can
> +// optimize it to extractElement.
> +// 2. OR x, 0 can be optimized as x. And x, 0 can be optimized as 0.
> +//===----------------------------------------------------------------------===//
> +
> +#include "llvm/ADT/DenseMap.h"
> +#include "llvm/ADT/PostOrderIterator.h"
> +#include "llvm/ADT/STLExtras.h"
> +#include "llvm/ADT/SmallVector.h"
> +#include "llvm/IR/CFG.h"
> +#include "llvm/IR/DataLayout.h"
> +#include "llvm/IR/DerivedTypes.h"
> +#include "llvm/IR/Function.h"
> +#include "llvm/IR/IRBuilder.h"
> +#include "llvm/IR/Instructions.h"
> +#include "llvm/Pass.h"
> +#include "llvm/Support/Debug.h"
> +#include "llvm/Support/MathExtras.h"
> +#include "llvm/Support/raw_ostream.h"
> +#include "llvm_gen_backend.hpp"
> +
> +using namespace llvm;
> +
> +#define DEBUG_TYPE "nacl-expand-ints"
> +
> +// Break instructions up into no larger than 64-bit chunks.
> +static const unsigned kChunkBits = 64;
> +static const unsigned kChunkBytes = kChunkBits / CHAR_BIT;
> +
> +namespace {
> +class ExpandLargeIntegers : public FunctionPass {
> +public:
> + static char ID;
> + ExpandLargeIntegers() : FunctionPass(ID) {
> + }
> + bool runOnFunction(Function &F) override;
> +};
> +
> +template <typename T> struct LoHiPair {
> + T Lo, Hi;
> + LoHiPair() : Lo(), Hi() {}
> + LoHiPair(T Lo, T Hi) : Lo(Lo), Hi(Hi) {}
> +};
> +typedef LoHiPair<IntegerType *> TypePair;
> +typedef LoHiPair<Value *> ValuePair;
> +typedef LoHiPair<unsigned> AlignPair;
> +
> +struct VectorElement {
> + Value *parent;
> + unsigned childId;
> + VectorElement() : parent(NULL), childId(0) {}
> + VectorElement(Value *p, unsigned i) : parent(p), childId(i) {}
> +};
> +
> +// Information needed to patch a phi node which forward-references a value.
> +struct ForwardPHI {
> + Value *Val;
> + PHINode *Lo, *Hi;
> + unsigned ValueNumber;
> + ForwardPHI(Value *Val, PHINode *Lo, PHINode *Hi, unsigned ValueNumber)
> + : Val(Val), Lo(Lo), Hi(Hi), ValueNumber(ValueNumber) {}
> +};
> +}
> +
> +char ExpandLargeIntegers::ID = 0;
> +
> +static bool isLegalBitSize(unsigned Bits) {
> + assert(Bits && "Can't have zero-size integers");
> + return Bits <= kChunkBits;
> +}
> +
> +static TypePair getExpandedIntTypes(Type *Ty) {
> + unsigned BitWidth = Ty->getIntegerBitWidth();
> + assert(!isLegalBitSize(BitWidth));
> + return TypePair(IntegerType::get(Ty->getContext(), kChunkBits),
> + IntegerType::get(Ty->getContext(), BitWidth - kChunkBits));
> +}
> +
> +// Return true if Val is an int which should be converted.
> +static bool shouldConvert(const Value *Val) {
> + Type *Ty = Val->getType();
> + if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
> + return !isLegalBitSize(ITy->getBitWidth());
> + return false;
> +}
> +
> +// Return a pair of constants expanded from C.
> +static ValuePair expandConstant(Constant *C) {
> + assert(shouldConvert(C));
> + TypePair ExpandedTypes = getExpandedIntTypes(C->getType());
> + if (isa<UndefValue>(C)) {
> + return ValuePair(UndefValue::get(ExpandedTypes.Lo),
> + UndefValue::get(ExpandedTypes.Hi));
> + } else if (ConstantInt *CInt = dyn_cast<ConstantInt>(C)) {
> + Constant *ShiftAmt = ConstantInt::get(
> + CInt->getType(), ExpandedTypes.Lo->getBitWidth(), false);
> + return ValuePair(
> + ConstantExpr::getTrunc(CInt, ExpandedTypes.Lo),
> + ConstantExpr::getTrunc(ConstantExpr::getLShr(CInt, ShiftAmt),
> + ExpandedTypes.Hi));
> + }
> + errs() << "Value: " << *C << "\n";
> + report_fatal_error("Unexpected constant value");
> +}
> +
> +template <typename T>
> +static AlignPair getAlign(const DataLayout &DL, T *I, Type *PrefAlignTy) {
> + unsigned LoAlign = I->getAlignment();
> + if (LoAlign == 0)
> + LoAlign = DL.getPrefTypeAlignment(PrefAlignTy);
> + unsigned HiAlign = MinAlign(LoAlign, kChunkBytes);
> + return AlignPair(LoAlign, HiAlign);
> +}
> +
> +namespace {
> +// Holds the state for converting/replacing values. We visit instructions in
> +// reverse post-order, phis are therefore the only instructions which can be
> +// visited before the value they use.
> +class ConversionState {
> +public:
> + // Return the expanded values for Val.
> + ValuePair getConverted(Value *Val) {
> + assert(shouldConvert(Val));
> + // Directly convert constants.
> + if (Constant *C = dyn_cast<Constant>(Val))
> + return expandConstant(C);
> + if (RewrittenIllegals.count(Val)) {
> + ValuePair Found = RewrittenIllegals[Val];
> + if (RewrittenLegals.count(Found.Lo))
> + Found.Lo = RewrittenLegals[Found.Lo];
> + if (RewrittenLegals.count(Found.Hi))
> + Found.Hi = RewrittenLegals[Found.Hi];
> + return Found;
> + }
> + errs() << "Value: " << *Val << "\n";
> + report_fatal_error("Expanded value not found in map");
> + }
> +
> + // Returns whether a converted value has been recorded. This is only useful
> + // for phi instructions: they can be encountered before the incoming
> + // instruction, whereas RPO order guarantees that other instructions always
> + // use converted values.
> + bool hasConverted(Value *Val) {
> + assert(shouldConvert(Val));
> + return dyn_cast<Constant>(Val) || RewrittenIllegals.count(Val);
> + }
> +
> + // Record a forward phi, temporarily setting it to use Undef. This will be
> + // patched up at the end of RPO.
> + ValuePair recordForwardPHI(Value *Val, PHINode *Lo, PHINode *Hi,
> + unsigned ValueNumber) {
> + DEBUG(dbgs() << "\tRecording as forward PHI\n");
> + ForwardPHIs.push_back(ForwardPHI(Val, Lo, Hi, ValueNumber));
> + return ValuePair(UndefValue::get(Lo->getType()),
> + UndefValue::get(Hi->getType()));
> + }
> +
> + void recordConverted(Instruction *From, const ValuePair &To) {
> + DEBUG(dbgs() << "\tTo: " << *To.Lo << "\n");
> + DEBUG(dbgs() << "\tAnd: " << *To.Hi << "\n");
> + ToErase.push_back(From);
> + RewrittenIllegals[From] = To;
> + }
> +
> + // Replace the uses of From with To, give From's name to To, and mark To for
> + // deletion.
> + void recordConverted(Instruction *From, Value *To) {
> + assert(!shouldConvert(From));
> + DEBUG(dbgs() << "\tTo: " << *To << "\n");
> + ToErase.push_back(From);
> + // From does not produce an illegal value, update its users in place.
> + From->replaceAllUsesWith(To);
> + To->takeName(From);
> + RewrittenLegals[From] = To;
> + }
> +
> + void patchForwardPHIs() {
> + DEBUG(if (!ForwardPHIs.empty()) dbgs() << "Patching forward PHIs:\n");
> + for (ForwardPHI &F : ForwardPHIs) {
> + ValuePair Ops = getConverted(F.Val);
> + F.Lo->setIncomingValue(F.ValueNumber, Ops.Lo);
> + F.Hi->setIncomingValue(F.ValueNumber, Ops.Hi);
> + DEBUG(dbgs() << "\t" << *F.Lo << "\n\t" << *F.Hi << "\n");
> + }
> + }
> +
> + void eraseReplacedInstructions() {
> + for (Instruction *I : ToErase)
> + I->dropAllReferences();
> +
> + for (Instruction *I : ToErase)
> + I->eraseFromParent();
> + }
> +
> + void addEraseCandidate(Instruction *c) {
> + ToErase.push_back(c);
> + }
> +
> + void appendElement(Value *v, Value *e) {
> + if (ExtractElement.count(v) == 0) {
> + SmallVector<Value *, 16> tmp;
> + tmp.push_back(e);
> + ExtractElement[v] = tmp;
> + } else
> + ExtractElement[v].push_back(e);
> + }
> +
> + Value *getElement(Value *v, unsigned id) {
> + return (ExtractElement[v])[id];
> + }
> + VectorElement &getVectorMap(Value *child) {
> + return VectorIllegals[child];
> + }
> +
> + bool convertedVector(Value *vector) {
> + return VectorIllegals.count(vector) > 0 ? true : false;
> + }
> +
> + void recordVectorMap(Value *child, VectorElement elem) {
> + VectorIllegals[child] = elem;
> + }
> +
> +private:
> + // Maps illegal values to their new converted lo/hi values.
> + DenseMap<Value *, ValuePair> RewrittenIllegals;
> + // Maps legal values to their new converted value.
> + DenseMap<Value *, Value *> RewrittenLegals;
> + // Illegal values which have already been converted, will be erased.
> + SmallVector<Instruction *, 32> ToErase;
> + // PHIs which were encountered but had forward references. They need to get
> + // patched up after RPO traversal.
> + SmallVector<ForwardPHI, 32> ForwardPHIs;
> + // helpers to solve bitcasting from vector to illegal integer types
> + // Maps a Value to its original Vector and elemId
> + DenseMap<Value *, VectorElement> VectorIllegals;
> + // cache the ExtractElement Values
> + DenseMap<Value *, SmallVector<Value *, 16>> ExtractElement;
> +};
> +} // Anonymous namespace
> +
> +static Value *buildVectorOrScalar(ConversionState &State, IRBuilder<> &IRB, SmallVector<Value *, 16> Elements) {
> + assert(!Elements.empty());
> + Type *IntTy = IntegerType::get(IRB.getContext(), 32);
> +
> + if (Elements.size() > 1) {
> + Value * vec = NULL;
> + unsigned ElemNo = Elements.size();
> + Type *ElemTy = Elements[0]->getType();
> + bool KeepInsert = isLegalBitSize(ElemTy->getIntegerBitWidth() * ElemNo);
> + for (unsigned i = 0; i < ElemNo; ++i) {
> + Value *tmp = vec ? vec : UndefValue::get(VectorType::get(ElemTy, ElemNo));
> + Value *idx = ConstantInt::get(IntTy, i);
> + vec = IRB.CreateInsertElement(tmp, Elements[i], idx);
> + if (!KeepInsert) {
> + State.addEraseCandidate(cast<Instruction>(vec));
> + }
> + }
> + return vec;
> + } else {
> + return Elements[0];
> + }
> +}
> +
> +void getSplitedValue(ConversionState &State, Value *Val, SmallVector<Value *, 16> &Result) {
> + while (shouldConvert(Val)) {
> + ValuePair Convert = State.getConverted(Val);
> + Result.push_back(Convert.Lo);
> + Val = Convert.Hi;
> + }
> + Result.push_back(Val);
> +}
> +
> +static void convertInstruction(Instruction *Inst, ConversionState &State,
> + const DataLayout &DL) {
> + DEBUG(dbgs() << "Expanding Large Integer: " << *Inst << "\n");
> + // Set the insert point *after* Inst, so that any instructions inserted here
> + // will be visited again. That allows iterative expansion of types > i128.
> + BasicBlock::iterator InsertPos(Inst);
> + IRBuilder<> IRB(++InsertPos);
> + StringRef Name = Inst->getName();
> +
> + if (PHINode *Phi = dyn_cast<PHINode>(Inst)) {
> + unsigned N = Phi->getNumIncomingValues();
> + TypePair OpTys = getExpandedIntTypes(Phi->getIncomingValue(0)->getType());
> + PHINode *Lo = IRB.CreatePHI(OpTys.Lo, N, Twine(Name + ".lo"));
> + PHINode *Hi = IRB.CreatePHI(OpTys.Hi, N, Twine(Name + ".hi"));
> + for (unsigned I = 0; I != N; ++I) {
> + Value *InVal = Phi->getIncomingValue(I);
> + BasicBlock *InBB = Phi->getIncomingBlock(I);
> + // If the value hasn't already been converted then this is a
> + // forward-reference PHI which needs to be patched up after RPO traversal.
> + ValuePair Ops = State.hasConverted(InVal)
> + ? State.getConverted(InVal)
> + : State.recordForwardPHI(InVal, Lo, Hi, I);
> + Lo->addIncoming(Ops.Lo, InBB);
> + Hi->addIncoming(Ops.Hi, InBB);
> + }
> + State.recordConverted(Phi, ValuePair(Lo, Hi));
> +
> + } else if (ZExtInst *ZExt = dyn_cast<ZExtInst>(Inst)) {
> + Value *Operand = ZExt->getOperand(0);
> + Type *OpTy = Operand->getType();
> + TypePair Tys = getExpandedIntTypes(Inst->getType());
> + Value *Lo, *Hi;
> + if (OpTy->getIntegerBitWidth() <= kChunkBits) {
> + Lo = IRB.CreateZExt(Operand, Tys.Lo, Twine(Name, ".lo"));
> + Hi = ConstantInt::get(Tys.Hi, 0);
> + } else {
> + ValuePair Ops = State.getConverted(Operand);
> + Lo = Ops.Lo;
> + Hi = IRB.CreateZExt(Ops.Hi, Tys.Hi, Twine(Name, ".hi"));
> + }
> + State.recordConverted(ZExt, ValuePair(Lo, Hi));
> +
> + } else if (TruncInst *Trunc = dyn_cast<TruncInst>(Inst)) {
> + Value *Operand = Trunc->getOperand(0);
> + assert(shouldConvert(Operand) && "TruncInst is expandable but not its op");
> + TypePair OpTys = getExpandedIntTypes(Operand->getType());
> + ValuePair Ops = State.getConverted(Operand);
> + if (!shouldConvert(Inst)) {
> + Value *NewInst = IRB.CreateTrunc(Ops.Lo, Trunc->getType(), Name);
> + State.recordConverted(Trunc, NewInst);
> + } else {
> + TypePair Tys = getExpandedIntTypes(Trunc->getType());
> + assert(Tys.Lo == OpTys.Lo);
> + Value *Lo = Ops.Lo;
> + Value *Hi = IRB.CreateTrunc(Ops.Hi, Tys.Hi, Twine(Name, ".hi"));
> + State.recordConverted(Trunc, ValuePair(Lo, Hi));
> + }
> +
> + } else if (BitCastInst *Cast = dyn_cast<BitCastInst>(Inst)) {
> + Value *Operand = Cast->getOperand(0);
> + bool DstVec = Inst->getType()->isVectorTy();
> +
> + Type *IntTy = IntegerType::get(Cast->getContext(), 32);
> + if (DstVec) {
> + // integer to vector, get all children and bitcast
> + SmallVector<Value *, 16> Split;
> + getSplitedValue(State, Operand, Split);
> +
> + Value *vec = NULL;
> + unsigned ElemNo = Split.size();
> + Type *ElemTy = Split[0]->getType();
> + for (unsigned i = 0; i < ElemNo; ++i) {
> + Value *tmp = vec ? vec : UndefValue::get(VectorType::get(ElemTy, ElemNo));
> + Value *idx = ConstantInt::get(IntTy, i);
> + vec = IRB.CreateInsertElement(tmp, Split[i], idx);
> + }
> + if (vec->getType() != Cast->getType())
> + vec = IRB.CreateBitCast(vec, Cast->getType());
> + State.recordConverted(Cast, vec);
> + } else {
> + // vector to integer
> + assert(Operand->getType()->isVectorTy());
> + VectorType *VecTy = cast<VectorType>(Operand->getType());
> + Type *LargeTy = Inst->getType();
> + Type *ElemTy = VecTy->getElementType();
> + unsigned ElemNo = VecTy->getNumElements();
> + Value * VectorRoot = NULL;
> + unsigned ChildIndex = 0;
> +
> + if (State.convertedVector(Operand)) {
> + VectorElement VE = State.getVectorMap(Operand);
> + VectorRoot = VE.parent;
> + ChildIndex = VE.childId;
> + } else {
> + for (unsigned i =0; i < ElemNo; i++)
> + State.appendElement(Operand,
> + IRB.CreateExtractElement(Operand, ConstantInt::get(IntTy, i))
> + );
> + VectorRoot = Operand;
> + }
> +
> + TypePair OpTys = getExpandedIntTypes(LargeTy);
> + Value *Lo, *Hi;
> + unsigned LowNo = OpTys.Lo->getIntegerBitWidth() / ElemTy->getIntegerBitWidth();
> + unsigned HighNo = OpTys.Hi->getIntegerBitWidth() / ElemTy->getIntegerBitWidth();
> +
> + SmallVector<Value *, 16> LoElems;
> + for (unsigned i = 0; i < LowNo; ++i)
> + LoElems.push_back(State.getElement(VectorRoot, i+ChildIndex));
> +
> + Lo = IRB.CreateBitCast(buildVectorOrScalar(State, IRB, LoElems), OpTys.Lo, Twine(Name, ".lo"));
> +
> + SmallVector<Value *, 16> HiElem;
> + for (unsigned i = 0; i < HighNo; ++i)
> + HiElem.push_back(State.getElement(VectorRoot, i+LowNo+ChildIndex));
> +
> + Value *NewVec = buildVectorOrScalar(State, IRB, HiElem);
> + Hi = IRB.CreateBitCast(NewVec, OpTys.Hi);
> +
> + State.recordVectorMap(NewVec, VectorElement(VectorRoot, LowNo + ChildIndex));
> + State.recordConverted(Cast, ValuePair(Lo, Hi));
> + }
> +
> + } else if (BinaryOperator *Binop = dyn_cast<BinaryOperator>(Inst)) {
> + ValuePair Lhs = State.getConverted(Binop->getOperand(0));
> + ValuePair Rhs = State.getConverted(Binop->getOperand(1));
> + TypePair Tys = getExpandedIntTypes(Binop->getType());
> + Instruction::BinaryOps Op = Binop->getOpcode();
> + switch (Op) {
> + case Instruction::And:
> + case Instruction::Or:
> + case Instruction::Xor: {
> + Value *Lo = IRB.CreateBinOp(Op, Lhs.Lo, Rhs.Lo, Twine(Name, ".lo"));
> + Value *Hi = IRB.CreateBinOp(Op, Lhs.Hi, Rhs.Hi, Twine(Name, ".hi"));
> + State.recordConverted(Binop, ValuePair(Lo, Hi));
> + break;
> + }
> +
> + case Instruction::Shl: {
> + ConstantInt *ShlAmount = dyn_cast<ConstantInt>(Rhs.Lo);
> + // TODO(dschuff): Expansion of variable-sized shifts isn't supported
> + // because the behavior depends on whether the shift amount is less than
> + // the size of the low part of the expanded type, and I haven't yet
> + // figured out a way to do it for variable-sized shifts without splitting
> + // the basic block. I don't believe it's actually necessary for
> + // bitfields. Likewise for LShr below.
> + if (!ShlAmount) {
> + errs() << "Shift: " << *Binop << "\n";
> + report_fatal_error("Expansion of variable-sized shifts of > 64-bit-"
> + "wide values is not supported");
> + }
> + unsigned ShiftAmount = ShlAmount->getZExtValue();
> + if (ShiftAmount >= Binop->getType()->getIntegerBitWidth())
> + ShiftAmount = 0; // Undefined behavior.
> +
> + unsigned HiBits = Tys.Hi->getIntegerBitWidth();
> + // |<------------Hi---------->|<-------Lo------>|
> + // | | |
> + // +--------+--------+--------+--------+--------+
> + // |abcdefghijklmnopqrstuvwxyz|ABCDEFGHIJKLMNOPQ|
> + // +--------+--------+--------+--------+--------+
> + // Possible shifts:
> + // |efghijklmnopqrstuvwxyzABCD|EFGHIJKLMNOPQ0000| Some Lo into Hi.
> + // |vwxyzABCDEFGHIJKLMNOPQ0000|00000000000000000| Lo is 0, keep some Hi.
> + // |DEFGHIJKLMNOPQ000000000000|00000000000000000| Lo is 0, no Hi left.
> + Value *Lo, *Hi;
> + if (ShiftAmount < kChunkBits) {
> + Lo = IRB.CreateShl(Lhs.Lo, ShiftAmount, Twine(Name, ".lo"));
> + Hi = IRB.CreateZExtOrTrunc(IRB.CreateLShr(Lhs.Lo,
> + kChunkBits - ShiftAmount,
> + Twine(Name, ".lo.shr")),
> + Tys.Hi, Twine(Name, ".lo.ext"));
> + } else {
> + Lo = ConstantInt::get(Tys.Lo, 0);
> + if (ShiftAmount == kChunkBits) {
> + // Hi will be from Lo
> + Hi = IRB.CreateZExtOrTrunc(Lhs.Lo, Tys.Hi, Twine(Name, ".lo.ext"));
> + } else {
> + Hi = IRB.CreateShl(
> + IRB.CreateZExtOrTrunc(Lhs.Lo, Tys.Hi, Twine(Name, ".lo.ext")),
> + ShiftAmount - kChunkBits, Twine(Name, ".lo.shl"));
> + }
> + }
> + if (ShiftAmount < HiBits)
> + Hi = IRB.CreateOr(
> + Hi, IRB.CreateShl(Lhs.Hi, ShiftAmount, Twine(Name, ".hi.shl")),
> + Twine(Name, ".or"));
> + State.recordConverted(Binop, ValuePair(Lo, Hi));
> + break;
> + }
> +
> + case Instruction::AShr:
> + case Instruction::LShr: {
> + ConstantInt *ShrAmount = dyn_cast<ConstantInt>(Rhs.Lo);
> + // TODO(dschuff): Expansion of variable-sized shifts isn't supported
> + // because the behavior depends on whether the shift amount is less than
> + // the size of the low part of the expanded type, and I haven't yet
> + // figured out a way to do it for variable-sized shifts without splitting
> + // the basic block. I don't believe it's actually necessary for bitfields.
> + if (!ShrAmount) {
> + errs() << "Shift: " << *Binop << "\n";
> + report_fatal_error("Expansion of variable-sized shifts of > 64-bit-"
> + "wide values is not supported");
> + }
> + bool IsArith = Op == Instruction::AShr;
> + unsigned ShiftAmount = ShrAmount->getZExtValue();
> + if (ShiftAmount >= Binop->getType()->getIntegerBitWidth())
> + ShiftAmount = 0; // Undefined behavior.
> + unsigned HiBitWidth = Tys.Hi->getIntegerBitWidth();
> + // |<--Hi-->|<-------Lo------>|
> + // | | |
> + // +--------+--------+--------+
> + // |abcdefgh|ABCDEFGHIJKLMNOPQ|
> + // +--------+--------+--------+
> + // Possible shifts (0 is sign when doing AShr):
> + // |0000abcd|defgABCDEFGHIJKLM| Some Hi into Lo.
> + // |00000000|00abcdefgABCDEFGH| Hi is 0, keep some Lo.
> + // |00000000|000000000000abcde| Hi is 0, no Lo left.
> + Value *Lo, *Hi;
> + if (ShiftAmount == 0) {
> + Lo = Lhs.Lo; Hi = Lhs.Hi;
> + } else {
> + if (ShiftAmount < kChunkBits) {
> + Lo = IRB.CreateShl(
> + IsArith
> + ? IRB.CreateSExtOrTrunc(Lhs.Hi, Tys.Lo, Twine(Name, ".hi.ext"))
> + : IRB.CreateZExtOrTrunc(Lhs.Hi, Tys.Lo, Twine(Name, ".hi.ext")),
> + kChunkBits - ShiftAmount, Twine(Name, ".hi.shl"));
> +
> + Lo = IRB.CreateOr(
> + Lo, IRB.CreateLShr(Lhs.Lo, ShiftAmount, Twine(Name, ".lo.shr")),
> + Twine(Name, ".lo"));
> + } else if (ShiftAmount == kChunkBits) {
> + Lo = IsArith
> + ? IRB.CreateSExtOrTrunc(Lhs.Hi, Tys.Lo, Twine(Name, ".hi.ext"))
> + : IRB.CreateZExtOrTrunc(Lhs.Hi, Tys.Lo, Twine(Name, ".hi.ext"));
> +
> + } else {
> + Lo = IRB.CreateBinOp(Op, Lhs.Hi,
> + ConstantInt::get(Tys.Hi, ShiftAmount - kChunkBits),
> + Twine(Name, ".hi.shr"));
> + Lo = IsArith
> + ? IRB.CreateSExtOrTrunc(Lo, Tys.Lo, Twine(Name, ".lo.ext"))
> + : IRB.CreateZExtOrTrunc(Lo, Tys.Lo, Twine(Name, ".lo.ext"));
> + }
> + if (ShiftAmount < HiBitWidth) {
> + Hi = IRB.CreateBinOp(Op, Lhs.Hi, ConstantInt::get(Tys.Hi, ShiftAmount),
> + Twine(Name, ".hi"));
> + } else {
> + Hi = IsArith
> + ? IRB.CreateAShr(Lhs.Hi, HiBitWidth - 1, Twine(Name, ".hi"))
> + : ConstantInt::get(Tys.Hi, 0);
> + }
> + }
> + State.recordConverted(Binop, ValuePair(Lo, Hi));
> + break;
> + }
> +
> + case Instruction::Add:
> + case Instruction::Sub: {
> + Value *Lo, *Hi;
> + if (Op == Instruction::Add) {
> + Value *Limit = IRB.CreateSelect(
> + IRB.CreateICmpULT(Lhs.Lo, Rhs.Lo, Twine(Name, ".cmp")), Rhs.Lo,
> + Lhs.Lo, Twine(Name, ".limit"));
> + // Don't propagate NUW/NSW to the lo operation: it can overflow.
> + Lo = IRB.CreateBinOp(Op, Lhs.Lo, Rhs.Lo, Twine(Name, ".lo"));
> + Value *Carry = IRB.CreateZExt(
> + IRB.CreateICmpULT(Lo, Limit, Twine(Name, ".overflowed")), Tys.Hi,
> + Twine(Name, ".carry"));
> + // TODO(jfb) The hi operation could be tagged with NUW/NSW.
> + Hi = IRB.CreateBinOp(
> + Op, IRB.CreateBinOp(Op, Lhs.Hi, Rhs.Hi, Twine(Name, ".hi")), Carry,
> + Twine(Name, ".carried"));
> + } else {
> + Value *Borrowed = IRB.CreateSExt(
> + IRB.CreateICmpULT(Lhs.Lo, Rhs.Lo, Twine(Name, ".borrow")), Tys.Hi,
> + Twine(Name, ".borrowing"));
> + Lo = IRB.CreateBinOp(Op, Lhs.Lo, Rhs.Lo, Twine(Name, ".lo"));
> + Hi = IRB.CreateBinOp(
> + Instruction::Add,
> + IRB.CreateBinOp(Op, Lhs.Hi, Rhs.Hi, Twine(Name, ".hi")), Borrowed,
> + Twine(Name, ".borrowed"));
> + }
> + State.recordConverted(Binop, ValuePair(Lo, Hi));
> + break;
> + }
> +
> + default:
> + errs() << "Operation: " << *Binop << "\n";
> + report_fatal_error("Unhandled BinaryOperator type in "
> + "ExpandLargeIntegers");
> + }
> +
> + } else if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
> + Value *Op = Load->getPointerOperand();
> + TypePair Tys = getExpandedIntTypes(Load->getType());
> + AlignPair Align = getAlign(DL, Load, Load->getType());
> + Value *Loty = IRB.CreateBitCast(Op, Tys.Lo->getPointerTo(),
> + Twine(Op->getName(), ".loty"));
> + Value *Lo =
> + IRB.CreateAlignedLoad(Loty, Align.Lo, Twine(Load->getName(), ".lo"));
> + Value *HiAddr =
> + IRB.CreateConstGEP1_32(Loty, 1, Twine(Op->getName(), ".hi.gep"));
> + Value *HiTy = IRB.CreateBitCast(HiAddr, Tys.Hi->getPointerTo(),
> + Twine(Op->getName(), ".hity"));
> + Value *Hi =
> + IRB.CreateAlignedLoad(HiTy, Align.Hi, Twine(Load->getName(), ".hi"));
> + State.recordConverted(Load, ValuePair(Lo, Hi));
> +
> + } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
> + Value *Ptr = Store->getPointerOperand();
> + TypePair Tys = getExpandedIntTypes(Store->getValueOperand()->getType());
> + ValuePair StoreVals = State.getConverted(Store->getValueOperand());
> + AlignPair Align = getAlign(DL, Store, Store->getValueOperand()->getType());
> + Value *Loty = IRB.CreateBitCast(Ptr, Tys.Lo->getPointerTo(),
> + Twine(Ptr->getName(), ".loty"));
> + Value *Lo = IRB.CreateAlignedStore(StoreVals.Lo, Loty, Align.Lo);
> + Value *HiAddr =
> + IRB.CreateConstGEP1_32(Loty, 1, Twine(Ptr->getName(), ".hi.gep"));
> + Value *HiTy = IRB.CreateBitCast(HiAddr, Tys.Hi->getPointerTo(),
> + Twine(Ptr->getName(), ".hity"));
> + Value *Hi = IRB.CreateAlignedStore(StoreVals.Hi, HiTy, Align.Hi);
> + State.recordConverted(Store, ValuePair(Lo, Hi));
> +
> + } else if (ICmpInst *Icmp = dyn_cast<ICmpInst>(Inst)) {
> + ValuePair Lhs = State.getConverted(Icmp->getOperand(0));
> + ValuePair Rhs = State.getConverted(Icmp->getOperand(1));
> + switch (Icmp->getPredicate()) {
> + case CmpInst::ICMP_EQ:
> + case CmpInst::ICMP_NE: {
> + Value *Lo = IRB.CreateICmp(Icmp->getUnsignedPredicate(), Lhs.Lo, Rhs.Lo,
> + Twine(Name, ".lo"));
> + Value *Hi = IRB.CreateICmp(Icmp->getUnsignedPredicate(), Lhs.Hi, Rhs.Hi,
> + Twine(Name, ".hi"));
> + Value *Result =
> + IRB.CreateBinOp(Instruction::And, Lo, Hi, Twine(Name, ".result"));
> + State.recordConverted(Icmp, Result);
> + break;
> + }
> +
> + // TODO(jfb): Implement the following cases.
> + case CmpInst::ICMP_UGT:
> + case CmpInst::ICMP_UGE:
> + case CmpInst::ICMP_ULT:
> + case CmpInst::ICMP_ULE:
> + case CmpInst::ICMP_SGT:
> + case CmpInst::ICMP_SGE:
> + case CmpInst::ICMP_SLT:
> + case CmpInst::ICMP_SLE:
> + errs() << "Comparison: " << *Icmp << "\n";
> + report_fatal_error("Comparisons other than equality not supported for"
> + "integer types larger than 64 bit");
> + default:
> + llvm_unreachable("Invalid integer comparison");
> + }
> +
> + } else if (SelectInst *Select = dyn_cast<SelectInst>(Inst)) {
> + Value *Cond = Select->getCondition();
> + ValuePair True = State.getConverted(Select->getTrueValue());
> + ValuePair False = State.getConverted(Select->getFalseValue());
> + Value *Lo = IRB.CreateSelect(Cond, True.Lo, False.Lo, Twine(Name, ".lo"));
> + Value *Hi = IRB.CreateSelect(Cond, True.Hi, False.Hi, Twine(Name, ".hi"));
> + State.recordConverted(Select, ValuePair(Lo, Hi));
> +
> + } else {
> + errs() << "Instruction: " << *Inst << "\n";
> + report_fatal_error("Unhandle large integer expansion");
> + }
> +}
> +
> +bool ExpandLargeIntegers::runOnFunction(Function &F) {
> + // Don't support changing the function arguments. Illegal function arguments
> + // should not be generated by clang.
> + for (const Argument &Arg : F.args())
> + if (shouldConvert(&Arg))
> + report_fatal_error("Function " + F.getName() +
> + " has illegal integer argument");
> +
> + // TODO(jfb) This should loop to handle nested forward PHIs.
> +
> + ConversionState State;
> + DataLayout DL(F.getParent());
> + bool Modified = false;
> + ReversePostOrderTraversal<Function *> RPOT(&F);
> + for (ReversePostOrderTraversal<Function *>::rpo_iterator FI = RPOT.begin(),
> + FE = RPOT.end();
> + FI != FE; ++FI) {
> + BasicBlock *BB = *FI;
> + for (Instruction &I : *BB) {
> + // Only attempt to convert an instruction if its result or any of its
> + // operands are illegal.
> + bool ShouldConvert = shouldConvert(&I);
> + for (Value *Op : I.operands())
> + ShouldConvert |= shouldConvert(Op);
> + if (ShouldConvert) {
> + convertInstruction(&I, State, DL);
> + Modified = true;
> + }
> + }
> + }
> + State.patchForwardPHIs();
> + State.eraseReplacedInstructions();
> + return Modified;
> +}
> +
> +FunctionPass *llvm::createExpandLargeIntegersPass() {
> + return new ExpandLargeIntegers();
> +}
> diff --git a/backend/src/llvm/llvm_gen_backend.hpp b/backend/src/llvm/llvm_gen_backend.hpp
> index 2bd070d..b8ed9fa 100644
> --- a/backend/src/llvm/llvm_gen_backend.hpp
> +++ b/backend/src/llvm/llvm_gen_backend.hpp
> @@ -48,6 +48,7 @@ namespace llvm {
> void PhiSafeReplaceUses(llvm::Use *U, llvm::Value *NewVal);
>
> FunctionPass *createExpandConstantExprPass();
> + FunctionPass *createExpandLargeIntegersPass();
> // Copy debug information from Original to New, and return New.
> template <typename T> T *CopyDebug(T *New, llvm::Instruction *Original) {
> New->setDebugLoc(Original->getDebugLoc());
> diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
> index 6a37b4e..5701563 100644
> --- a/backend/src/llvm/llvm_to_gen.cpp
> +++ b/backend/src/llvm/llvm_to_gen.cpp
> @@ -270,20 +270,21 @@ namespace gbe
> passes.add(createScalarReplAggregatesPass(64, true, -1, -1, 64));
> passes.add(createLoadStoreOptimizationPass());
> passes.add(createConstantPropagationPass());
> - passes.add(createLowerSwitchPass());
> passes.add(createPromoteMemoryToRegisterPass());
> if(optLevel > 0)
> - passes.add(createGVNPass()); // Remove redundancies
> + passes.add(createGVNPass()); // Remove redundancies
> passes.add(createPrintfParserPass());
> - passes.add(createExpandConstantExprPass());
> - passes.add(createScalarizePass()); // Expand all vector ops
> - passes.add(createLegalizePass()); // legalize large integer operation
> - passes.add(createConstantPropagationPass()); // propagate constant after scalarize/legalize
> - passes.add(createExpandConstantExprPass()); // constant prop may generate ConstantExpr
> - passes.add(createRemoveGEPPass(unit)); // Constant prop may generate gep
> - passes.add(createDeadInstEliminationPass()); // Remove simplified instructions
> + passes.add(createExpandConstantExprPass()); // expand ConstantExpr
> + passes.add(createScalarizePass()); // Expand all vector ops
> + passes.add(createExpandLargeIntegersPass()); // legalize large integer operation
> + passes.add(createInstructionCombiningPass()); // legalize will generate some silly instructions
> + passes.add(createConstantPropagationPass()); // propagate constant after scalarize/legalize
> + passes.add(createExpandConstantExprPass()); // constant prop may generate ConstantExpr
> + passes.add(createRemoveGEPPass(unit)); // Constant prop may generate gep
> + passes.add(createDeadInstEliminationPass()); // Remove simplified instructions
> passes.add(createCFGSimplificationPass()); // Merge & remove BBs
> - passes.add(createScalarizePass()); // Expand all vector ops
> + passes.add(createLowerSwitchPass()); // simplify cfg will generate switch-case instruction
> + passes.add(createScalarizePass()); // Expand all vector ops
>
> if(OCL_OUTPUT_CFG)
> passes.add(createCFGPrinterPass());
> --
> 1.7.10.4
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list