[Beignet] [PATCH 2/3] Remove useless vector check after scalarize pass.

Thu May 16 22:57:47 PDT 2013

On Thu, May 16, 2013 at 12:36:34PM +0800, Yang Rong wrote:
> 
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
>  backend/src/llvm/llvm_gen_backend.cpp |  293 +++++++++++++--------------------
>  1 file changed, 119 insertions(+), 174 deletions(-)
> 
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index 3855011..c99c1eb 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -222,30 +222,6 @@ namespace gbe
>      return ir::FAMILY_BOOL;
>    }
>  
> -  /*! Get number of element to process dealing either with a vector or a scalar
> -   *  value
> -   */
> -  static ir::Type getVectorInfo(const ir::Context &ctx, Type *llvmType, Value *value, uint32_t &elemNum, bool useUnsigned = false)
> -  {
> -    ir::Type type;
> -    if (llvmType->isVectorTy() == true) {
> -      VectorType *vectorType = cast<VectorType>(llvmType);
> -      Type *elementType = vectorType->getElementType();
> -      elemNum = vectorType->getNumElements();
> -      if (useUnsigned)
> -        type = getUnsignedType(ctx, elementType);
> -      else
> -        type = getType(ctx, elementType);
> -    } else {
> -      elemNum = 1;
> -      if (useUnsigned)
> -        type = getUnsignedType(ctx, llvmType);
> -      else
> -        type = getType(ctx, llvmType);
> -    }
> -    return type;
> -  }
> -
This function may be still useful for other special instruction, for example, the get_image_xxx.
The other part LGTM. Thanks.

>    /*! OCL to Gen-IR address type */
>    static INLINE ir::AddressSpace addressSpaceLLVMToGen(unsigned llvmMemSpace) {
>      switch (llvmMemSpace) {
> @@ -813,32 +789,29 @@ namespace gbe
>        PHINode *PN = cast<PHINode>(I);
>        Value *IV = PN->getIncomingValueForBlock(curr);
>        if (!isa<UndefValue>(IV)) {
> -        uint32_t elemNum;
>          Type *llvmType = PN->getType();
>          GBE_ASSERTM(llvmType != Type::getInt1Ty(llvmType->getContext()),
>            "TODO Boolean values cannot escape their definition basic block");
> -        const ir::Type type = getVectorInfo(ctx, llvmType, PN, elemNum);
> +        const ir::Type type = getType(ctx, llvmType);
>  
>          // Emit the MOV required by the PHI function. We do it simple and do not
>          // try to optimize them. A next data flow analysis pass on the Gen IR
>          // will remove them
> -        for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
> -          Value *PHICopy = this->getPHICopy(PN);
> -          const ir::Register dst = this->getRegister(PHICopy, elemID);
> -          Constant *CP = dyn_cast<Constant>(IV);
> -          if (CP) {
> -            GBE_ASSERT(isa<GlobalValue>(CP) == false);
> -            ConstantVector *CPV = dyn_cast<ConstantVector>(CP);
> -            if (CPV && dyn_cast<ConstantVector>(CPV) &&
> -                isa<UndefValue>(extractConstantElem(CPV, elemID)))
> -              continue;
> -            const ir::ImmediateIndex immIndex = this->newImmediate(CP, elemID);
> -            const ir::Immediate imm = ctx.getImmediate(immIndex);
> -            ctx.LOADI(imm.type, dst, immIndex);
> -          } else if (regTranslator.valueExists(IV,elemID) || dyn_cast<Constant>(IV)) {
> -            const ir::Register src = this->getRegister(IV, elemID);
> -            ctx.MOV(type, dst, src);
> -          }
> +        Value *PHICopy = this->getPHICopy(PN);
> +        const ir::Register dst = this->getRegister(PHICopy);
> +        Constant *CP = dyn_cast<Constant>(IV);
> +        if (CP) {
> +          GBE_ASSERT(isa<GlobalValue>(CP) == false);
> +          ConstantVector *CPV = dyn_cast<ConstantVector>(CP);
> +          if (CPV && dyn_cast<ConstantVector>(CPV) &&
> +              isa<UndefValue>(extractConstantElem(CPV, 0)))
> +            continue;
> +          const ir::ImmediateIndex immIndex = this->newImmediate(CP);
> +          const ir::Immediate imm = ctx.getImmediate(immIndex);
> +          ctx.LOADI(imm.type, dst, immIndex);
> +        } else if (regTranslator.valueExists(IV,0) || dyn_cast<Constant>(IV)) {
> +          const ir::Register src = this->getRegister(IV);
> +          ctx.MOV(type, dst, src);
>          }
>        }
>      }
> @@ -1237,36 +1210,33 @@ namespace gbe
>  #endif /* GBE_DEBUG */
>  
>      // Get the element type for a vector
> -    uint32_t elemNum;
> -    const ir::Type type = getVectorInfo(ctx, I.getType(), &I, elemNum);
> +    const ir::Type type = getType(ctx, I.getType());
>  
>      // Emit the instructions in a row
> -    for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
> -      const ir::Register dst = this->getRegister(&I, elemID);
> -      const ir::Register src0 = this->getRegister(I.getOperand(0), elemID);
> -      const ir::Register src1 = this->getRegister(I.getOperand(1), elemID);
> -
> -      switch (I.getOpcode()) {
> -        case Instruction::Add:
> -        case Instruction::FAdd: ctx.ADD(type, dst, src0, src1); break;
> -        case Instruction::Sub:
> -        case Instruction::FSub: ctx.SUB(type, dst, src0, src1); break;
> -        case Instruction::Mul:
> -        case Instruction::FMul: ctx.MUL(type, dst, src0, src1); break;
> -        case Instruction::URem:
> -        case Instruction::SRem:
> -        case Instruction::FRem: ctx.REM(type, dst, src0, src1); break;
> -        case Instruction::UDiv:
> -        case Instruction::SDiv:
> -        case Instruction::FDiv: ctx.DIV(type, dst, src0, src1); break;
> -        case Instruction::And:  ctx.AND(type, dst, src0, src1); break;
> -        case Instruction::Or:   ctx.OR(type, dst, src0, src1); break;
> -        case Instruction::Xor:  ctx.XOR(type, dst, src0, src1); break;
> -        case Instruction::Shl:  ctx.SHL(type, dst, src0, src1); break;
> -        case Instruction::LShr: ctx.SHR(getUnsignedType(ctx, I.getType()), dst, src0, src1); break;
> -        case Instruction::AShr: ctx.ASR(type, dst, src0, src1); break;
> -        default: NOT_SUPPORTED;
> -      }
> +    const ir::Register dst = this->getRegister(&I);
> +    const ir::Register src0 = this->getRegister(I.getOperand(0));
> +    const ir::Register src1 = this->getRegister(I.getOperand(1));
> +
> +    switch (I.getOpcode()) {
> +      case Instruction::Add:
> +      case Instruction::FAdd: ctx.ADD(type, dst, src0, src1); break;
> +      case Instruction::Sub:
> +      case Instruction::FSub: ctx.SUB(type, dst, src0, src1); break;
> +      case Instruction::Mul:
> +      case Instruction::FMul: ctx.MUL(type, dst, src0, src1); break;
> +      case Instruction::URem:
> +      case Instruction::SRem:
> +      case Instruction::FRem: ctx.REM(type, dst, src0, src1); break;
> +      case Instruction::UDiv:
> +      case Instruction::SDiv:
> +      case Instruction::FDiv: ctx.DIV(type, dst, src0, src1); break;
> +      case Instruction::And:  ctx.AND(type, dst, src0, src1); break;
> +      case Instruction::Or:   ctx.OR(type, dst, src0, src1); break;
> +      case Instruction::Xor:  ctx.XOR(type, dst, src0, src1); break;
> +      case Instruction::Shl:  ctx.SHL(type, dst, src0, src1); break;
> +      case Instruction::LShr: ctx.SHR(getUnsignedType(ctx, I.getType()), dst, src0, src1); break;
> +      case Instruction::AShr: ctx.ASR(type, dst, src0, src1); break;
> +      default: NOT_SUPPORTED;
>      }
>    }
>  
> @@ -1294,49 +1264,46 @@ namespace gbe
>      GBE_ASSERT(I.getOperand(0)->getType() != Type::getInt1Ty(I.getContext()));
>  
>      // Get the element type and the number of elements
> -    uint32_t elemNum;
>      Type *operandType = I.getOperand(0)->getType();
> -    const ir::Type type = getVectorInfo(ctx, operandType, &I, elemNum);
> +    const ir::Type type = getType(ctx, operandType);
>      const ir::Type signedType = makeTypeSigned(type);
>      const ir::Type unsignedType = makeTypeUnsigned(type);
>  
>      // Emit the instructions in a row
> -    for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
> -      const ir::Register dst = this->getRegister(&I, elemID);
> -      const ir::Register src0 = this->getRegister(I.getOperand(0), elemID);
> -      const ir::Register src1 = this->getRegister(I.getOperand(1), elemID);
> -
> -      // We must invert the condition to simplify the branch code
> -      if (conditionSet.find(&I) != conditionSet.end()) {
> -        switch (I.getPredicate()) {
> -          case ICmpInst::ICMP_EQ:  ctx.NE(type, dst, src0, src1); break;
> -          case ICmpInst::ICMP_NE:  ctx.EQ(type, dst, src0, src1); break;
> -          case ICmpInst::ICMP_ULE: ctx.GT((unsignedType), dst, src0, src1); break;
> -          case ICmpInst::ICMP_SLE: ctx.GT(signedType, dst, src0, src1); break;
> -          case ICmpInst::ICMP_UGE: ctx.LT(unsignedType, dst, src0, src1); break;
> -          case ICmpInst::ICMP_SGE: ctx.LT(signedType, dst, src0, src1); break;
> -          case ICmpInst::ICMP_ULT: ctx.GE(unsignedType, dst, src0, src1); break;
> -          case ICmpInst::ICMP_SLT: ctx.GE(signedType, dst, src0, src1); break;
> -          case ICmpInst::ICMP_UGT: ctx.LE(unsignedType, dst, src0, src1); break;
> -          case ICmpInst::ICMP_SGT: ctx.LE(signedType, dst, src0, src1); break;
> -          default: NOT_SUPPORTED;
> -        }
> +    const ir::Register dst = this->getRegister(&I);
> +    const ir::Register src0 = this->getRegister(I.getOperand(0));
> +    const ir::Register src1 = this->getRegister(I.getOperand(1));
> +
> +    // We must invert the condition to simplify the branch code
> +    if (conditionSet.find(&I) != conditionSet.end()) {
> +      switch (I.getPredicate()) {
> +        case ICmpInst::ICMP_EQ:  ctx.NE(type, dst, src0, src1); break;
> +        case ICmpInst::ICMP_NE:  ctx.EQ(type, dst, src0, src1); break;
> +        case ICmpInst::ICMP_ULE: ctx.GT((unsignedType), dst, src0, src1); break;
> +        case ICmpInst::ICMP_SLE: ctx.GT(signedType, dst, src0, src1); break;
> +        case ICmpInst::ICMP_UGE: ctx.LT(unsignedType, dst, src0, src1); break;
> +        case ICmpInst::ICMP_SGE: ctx.LT(signedType, dst, src0, src1); break;
> +        case ICmpInst::ICMP_ULT: ctx.GE(unsignedType, dst, src0, src1); break;
> +        case ICmpInst::ICMP_SLT: ctx.GE(signedType, dst, src0, src1); break;
> +        case ICmpInst::ICMP_UGT: ctx.LE(unsignedType, dst, src0, src1); break;
> +        case ICmpInst::ICMP_SGT: ctx.LE(signedType, dst, src0, src1); break;
> +        default: NOT_SUPPORTED;
>        }
> -      // Nothing special to do
> -      else {
> -        switch (I.getPredicate()) {
> -          case ICmpInst::ICMP_EQ:  ctx.EQ(type, dst, src0, src1); break;
> -          case ICmpInst::ICMP_NE:  ctx.NE(type, dst, src0, src1); break;
> -          case ICmpInst::ICMP_ULE: ctx.LE((unsignedType), dst, src0, src1); break;
> -          case ICmpInst::ICMP_SLE: ctx.LE(signedType, dst, src0, src1); break;
> -          case ICmpInst::ICMP_UGE: ctx.GE(unsignedType, dst, src0, src1); break;
> -          case ICmpInst::ICMP_SGE: ctx.GE(signedType, dst, src0, src1); break;
> -          case ICmpInst::ICMP_ULT: ctx.LT(unsignedType, dst, src0, src1); break;
> -          case ICmpInst::ICMP_SLT: ctx.LT(signedType, dst, src0, src1); break;
> -          case ICmpInst::ICMP_UGT: ctx.GT(unsignedType, dst, src0, src1); break;
> -          case ICmpInst::ICMP_SGT: ctx.GT(signedType, dst, src0, src1); break;
> -          default: NOT_SUPPORTED;
> -        }
> +    }
> +    // Nothing special to do
> +    else {
> +      switch (I.getPredicate()) {
> +        case ICmpInst::ICMP_EQ:  ctx.EQ(type, dst, src0, src1); break;
> +        case ICmpInst::ICMP_NE:  ctx.NE(type, dst, src0, src1); break;
> +        case ICmpInst::ICMP_ULE: ctx.LE((unsignedType), dst, src0, src1); break;
> +        case ICmpInst::ICMP_SLE: ctx.LE(signedType, dst, src0, src1); break;
> +        case ICmpInst::ICMP_UGE: ctx.GE(unsignedType, dst, src0, src1); break;
> +        case ICmpInst::ICMP_SGE: ctx.GE(signedType, dst, src0, src1); break;
> +        case ICmpInst::ICMP_ULT: ctx.LT(unsignedType, dst, src0, src1); break;
> +        case ICmpInst::ICMP_SLT: ctx.LT(signedType, dst, src0, src1); break;
> +        case ICmpInst::ICMP_UGT: ctx.GT(unsignedType, dst, src0, src1); break;
> +        case ICmpInst::ICMP_SGT: ctx.GT(signedType, dst, src0, src1); break;
> +        default: NOT_SUPPORTED;
>        }
>      }
>    }
> @@ -1348,31 +1315,28 @@ namespace gbe
>    void GenWriter::emitFCmpInst(FCmpInst &I) {
>  
>      // Get the element type and the number of elements
> -    uint32_t elemNum;
>      Type *operandType = I.getOperand(0)->getType();
> -    const ir::Type type = getVectorInfo(ctx, operandType, &I, elemNum);
> +    const ir::Type type = getType(ctx, operandType);
>  
>      // Emit the instructions in a row
> -    for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
> -      const ir::Register dst = this->getRegister(&I, elemID);
> -      const ir::Register src0 = this->getRegister(I.getOperand(0), elemID);
> -      const ir::Register src1 = this->getRegister(I.getOperand(1), elemID);
> -
> -      switch (I.getPredicate()) {
> -        case ICmpInst::FCMP_OEQ:
> -        case ICmpInst::FCMP_UEQ: ctx.EQ(type, dst, src0, src1); break;
> -        case ICmpInst::FCMP_ONE:
> -        case ICmpInst::FCMP_UNE: ctx.NE(type, dst, src0, src1); break;
> -        case ICmpInst::FCMP_OLE:
> -        case ICmpInst::FCMP_ULE: ctx.LE(type, dst, src0, src1); break;
> -        case ICmpInst::FCMP_OGE:
> -        case ICmpInst::FCMP_UGE: ctx.GE(type, dst, src0, src1); break;
> -        case ICmpInst::FCMP_OLT:
> -        case ICmpInst::FCMP_ULT: ctx.LT(type, dst, src0, src1); break;
> -        case ICmpInst::FCMP_OGT:
> -        case ICmpInst::FCMP_UGT: ctx.GT(type, dst, src0, src1); break;
> -        default: NOT_SUPPORTED;
> -      }
> +    const ir::Register dst = this->getRegister(&I);
> +    const ir::Register src0 = this->getRegister(I.getOperand(0));
> +    const ir::Register src1 = this->getRegister(I.getOperand(1));
> +
> +    switch (I.getPredicate()) {
> +      case ICmpInst::FCMP_OEQ:
> +      case ICmpInst::FCMP_UEQ: ctx.EQ(type, dst, src0, src1); break;
> +      case ICmpInst::FCMP_ONE:
> +      case ICmpInst::FCMP_UNE: ctx.NE(type, dst, src0, src1); break;
> +      case ICmpInst::FCMP_OLE:
> +      case ICmpInst::FCMP_ULE: ctx.LE(type, dst, src0, src1); break;
> +      case ICmpInst::FCMP_OGE:
> +      case ICmpInst::FCMP_UGE: ctx.GE(type, dst, src0, src1); break;
> +      case ICmpInst::FCMP_OLT:
> +      case ICmpInst::FCMP_ULT: ctx.LT(type, dst, src0, src1); break;
> +      case ICmpInst::FCMP_OGT:
> +      case ICmpInst::FCMP_UGT: ctx.GT(type, dst, src0, src1); break;
> +      default: NOT_SUPPORTED;
>      }
>    }
>  
> @@ -1402,10 +1366,7 @@ namespace gbe
>        // Bitcast just forward registers
>        case Instruction::BitCast:
>        {
> -        uint32_t elemNum;
> -        getVectorInfo(ctx, I.getType(), &I, elemNum);
> -        for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
> -          regTranslator.newValueProxy(srcValue, dstValue, elemID, elemID);
> +        regTranslator.newValueProxy(srcValue, dstValue);
>        }
>        break;
>        // Various conversion operations -> just allocate registers for them
> @@ -1453,15 +1414,14 @@ namespace gbe
>        case Instruction::Trunc:
>        {
>          // Get the element type for a vector
> -        uint32_t elemNum;
>          Type *llvmDstType = I.getType();
>          Type *llvmSrcType = I.getOperand(0)->getType();
> -        const ir::Type dstType = getVectorInfo(ctx, llvmDstType, &I, elemNum);
> +        const ir::Type dstType = getType(ctx, llvmDstType);
>          ir::Type srcType;
>          if (I.getOpcode() == Instruction::ZExt) {
> -          srcType = getVectorInfo(ctx, llvmSrcType, &I, elemNum, true);
> +          srcType = getUnsignedType(ctx, llvmSrcType);
>          } else {
> -          srcType = getVectorInfo(ctx, llvmSrcType, &I, elemNum);
> +          srcType = getType(ctx, llvmSrcType);
>          }
>  
>          // We use a select (0,1) not a convert when the destination is a boolean
> @@ -1473,19 +1433,15 @@ namespace gbe
>            const ir::Register oneReg = ctx.reg(family);
>            ctx.LOADI(dstType, zeroReg, zero);
>            ctx.LOADI(dstType, oneReg, one);
> -          for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
> -            const ir::Register dst = this->getRegister(&I, elemID);
> -            const ir::Register src = this->getRegister(I.getOperand(0), elemID);
> -            ctx.SEL(dstType, dst, src, oneReg, zeroReg);
> -          }
> +          const ir::Register dst = this->getRegister(&I);
> +          const ir::Register src = this->getRegister(I.getOperand(0));
> +          ctx.SEL(dstType, dst, src, oneReg, zeroReg);
>          }
>          // Use a convert for the other cases
>          else {
> -          for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
> -            const ir::Register dst = this->getRegister(&I, elemID);
> -            const ir::Register src = this->getRegister(I.getOperand(0), elemID);
> -            ctx.CVT(dstType, srcType, dst, src);
> -          }
> +          const ir::Register dst = this->getRegister(&I);
> +          const ir::Register src = this->getRegister(I.getOperand(0));
> +          ctx.CVT(dstType, srcType, dst, src);
>          }
>        }
>        break;
> @@ -1519,22 +1475,14 @@ namespace gbe
>  
>    void GenWriter::emitSelectInst(SelectInst &I) {
>      // Get the element type for a vector
> -    uint32_t elemNum;
> -    const ir::Type type = getVectorInfo(ctx, I.getType(), &I, elemNum);
> -
> -    // Condition can be either a vector or a scalar
> -    Type *condType = I.getOperand(0)->getType();
> -    const bool isVectorCond = isa<VectorType>(condType);
> +    const ir::Type type = getType(ctx, I.getType());
>  
>      // Emit the instructions in a row
> -    for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
> -      const ir::Register dst = this->getRegister(&I, elemID);
> -      const uint32_t condID = isVectorCond ? elemID : 0;
> -      const ir::Register cond = this->getRegister(I.getOperand(0), condID);
> -      const ir::Register src0 = this->getRegister(I.getOperand(1), elemID);
> -      const ir::Register src1 = this->getRegister(I.getOperand(2), elemID);
> -      ctx.SEL(type, dst, cond, src0, src1);
> -    }
> +    const ir::Register dst = this->getRegister(&I);
> +    const ir::Register cond = this->getRegister(I.getOperand(0));
> +    const ir::Register src0 = this->getRegister(I.getOperand(1));
> +    const ir::Register src1 = this->getRegister(I.getOperand(2));
> +    ctx.SEL(type, dst, cond, src0, src1);
>    }
>  
>    void GenWriter::regAllocatePHINode(PHINode &I) {
> @@ -1547,15 +1495,11 @@ namespace gbe
>  
>    void GenWriter::emitPHINode(PHINode &I) {
>      Value *copy = this->getPHICopy(&I);
> -    uint32_t elemNum;
> -    const ir::Type type = getVectorInfo(ctx, I.getType(), &I, elemNum);
> -
> -    // Emit the MOVs to avoid the lost copy issue
> -    for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
> -      const ir::Register dst = this->getRegister(&I, elemID);
> -      const ir::Register src = this->getRegister(copy, elemID);
> -      ctx.MOV(type, dst, src);
> -    }
> +    const ir::Type type = getType(ctx, I.getType());
> +
> +    const ir::Register dst = this->getRegister(&I);
> +    const ir::Register src = this->getRegister(copy);
> +    ctx.MOV(type, dst, src);
>    }
>  
>    void GenWriter::regAllocateBranchInst(BranchInst &I) {}
> @@ -1728,9 +1672,10 @@ namespace gbe
>        case GEN_OCL_READ_IMAGE15:
>        {
>        // dst is a 4 elements vector. We allocate all 4 registers here.
> -        uint32_t elemNum;
> -        (void)getVectorInfo(ctx, I.getType(), &I, elemNum);
> -        GBE_ASSERT(elemNum == 4);
> +        Type* ty = I.getType();
> +        GBE_ASSERT(ty->isVectorTy());
> +        uint32_t elemNum = (cast<VectorType>(ty))->getNumElements();
> +        GBE_ASSERT(elemNum==4);
>          this->newRegister(&I);
>          break;
>        }
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet