[Mesa-dev] [PATCH] radeon/llvm: improve select_cc lowering to generate CND* more often

Wed Sep 26 08:56:58 PDT 2012

On Tue, Sep 25, 2012 at 11:45:37PM +0200, Vincent Lejeune wrote:
> ---
>  src/gallium/drivers/radeon/R600ISelLowering.cpp    | 90 +++++++++++++---------
>  src/gallium/drivers/radeon/R600ISelLowering.h      |  3 +
>  src/gallium/drivers/radeon/R600Instructions.td     | 38 +++++++--
>  .../drivers/radeon/radeon_setup_tgsi_llvm.c        | 17 +++-
>  4 files changed, 104 insertions(+), 44 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp
> index 379bd48..d52762b 100644
> --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp
> +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp
> @@ -507,6 +507,17 @@ SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
>                                   Op.getOperand(1)));
>  }
>  
> +bool R600TargetLowering::isZero(SDValue Op) const
> +{
> +  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
> +    return !(Cst->getZExtValue());
> +  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
> +    return CstFP->isExactlyValue(0.0f);
> +  } else {
> +    return false;
> +  }
> +}
> +

You can simplify this function by using ConstantSDNode::isNullValue().

>  SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
>  {
>    DebugLoc DL = Op.getDebugLoc();
> @@ -517,7 +528,6 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
>    SDValue True = Op.getOperand(2);
>    SDValue False = Op.getOperand(3);
>    SDValue CC = Op.getOperand(4);
> -  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
>    SDValue Temp;
>  
>    // LHS and RHS are guaranteed to be the same value type
> @@ -560,47 +570,58 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
>    if (isHWTrueValue(False) && isHWFalseValue(True)) {
>    }
>  
> -  // XXX Check if we can lower this to a SELECT or if it is supported by a native
> -  // operation. (The code below does this but we don't have the Instruction
> -  // selection patterns to do this yet.
> -#if 0
> +  // Check if we can lower this to a native operation.
> +  // CND* instructions requires all operands to have the same type, 
> +  // and RHS to be zero.
> +
>    if (isZero(LHS) || isZero(RHS)) {
>      SDValue Cond = (isZero(LHS) ? RHS : LHS);
> -    bool SwapTF = false;
> +    SDValue Zero = (isZero(LHS) ? LHS : RHS);
> +    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
> +    if (CompareVT != VT) {
> +      True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
> +      False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
> +    }
> +    if (isZero(LHS)) {
> +      CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
> +    }
> +
>      switch (CCOpcode) {
> -    case ISD::SETOEQ:
 -    case ISD::SETUEQ:
> -    case ISD::SETEQ:
> -      SwapTF = true;
> -      // Fall through
>      case ISD::SETONE:
>      case ISD::SETUNE:
>      case ISD::SETNE:
> -      // We can lower to select
> -      if (SwapTF) {
> -        Temp = True;
> -        True = False;
> -        False = Temp;
> -      }
> -      // CNDE
> -      return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
> +    case ISD::SETULE:
> +    case ISD::SETULT:
> +    case ISD::SETOLE:
> +    case ISD::SETOLT:
> +    case ISD::SETLE:
> +    case ISD::SETLT:
> +      CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
> +      Temp = True;
> +      True = False;
> +      False = Temp;
> +      break;
>      default:
> -      // Supported by a native operation (CNDGE, CNDGT)
> -      return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
> +      break;
>      }
> +    SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
> +        Cond, Zero, 
> +        True, False, 
> +        DAG.getCondCode(CCOpcode));
> +    return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
>    }
> -#endif
> +
>  
>    // If we make it this for it means we have no native instructions to handle
>    // this SELECT_CC, so we must lower it.
>    SDValue HWTrue, HWFalse;
>  
> -  if (VT == MVT::f32) {
> -    HWTrue = DAG.getConstantFP(1.0f, VT);
> -    HWFalse = DAG.getConstantFP(0.0f, VT);
> -  } else if (VT == MVT::i32) {
> -    HWTrue = DAG.getConstant(-1, VT);
> -    HWFalse = DAG.getConstant(0, VT);
> +  if (CompareVT == MVT::f32) {
> +    HWTrue = DAG.getConstantFP(1.0f, CompareVT);
> +    HWFalse = DAG.getConstantFP(0.0f, CompareVT);
> +  } else if (CompareVT == MVT::i32) {
> +    HWTrue = DAG.getConstant(-1, CompareVT);
> +    HWFalse = DAG.getConstant(0, CompareVT);
>    }
>    else {
>      assert(!"Unhandled value type in LowerSELECT_CC");
> @@ -608,15 +629,12 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
>  
>    // Lower this unsupported SELECT_CC into a combination of two supported
>    // SELECT_CC operations.
> -  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
> -
> -  // Convert floating point condition to i1
> -  if (VT == MVT::f32) {
> -    Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32,
> -                       DAG.getNode(ISD::FNEG, DL, VT, Cond));
> -  }
> +  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
>  
> -  return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
> +  return DAG.getNode(ISD::SELECT_CC, DL, VT, 
> +      Cond, HWFalse, 
> +      True, False,
> +      DAG.getCondCode(ISD::SETNE));
>  }
>  
>  SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
> diff --git a/src/gallium/drivers/radeon/R600ISelLowering.h b/src/gallium/drivers/radeon/R600ISelLowering.h
> index 7c81f5f..0117ca6 100644
> --- a/src/gallium/drivers/radeon/R600ISelLowering.h
> +++ b/src/gallium/drivers/radeon/R600ISelLowering.h
> @@ -52,6 +52,9 @@ private:
>    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
>    SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const;
>    SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
> +  
> +  bool isZero(SDValue Op) const;
> +  SDValue reverseCondOp(unsigned CCOpcode) const;

This function does not appear to be implemented.

>  };
>  
>  } // End namespace llvm;
> diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
> index 92a3e38..ba52721 100644
> --- a/src/gallium/drivers/radeon/R600Instructions.td
> +++ b/src/gallium/drivers/radeon/R600Instructions.td
> @@ -500,7 +500,25 @@ def SETGE_UINT : R600_2OP <
>  def CNDE_INT : R600_3OP <
>  	0x1C, "CNDE_INT",
>    [(set (i32 R600_Reg32:$dst),
> -   (select R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))]
> +   (selectcc (i32 R600_Reg32:$src0), 0,
> +       (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), 
> +       COND_EQ))]
> +>;
> +
> +def CNDGE_INT : R600_3OP <
> +	0x1E, "CNDGE_INT",
> +  [(set (i32 R600_Reg32:$dst),
> +   (selectcc (i32 R600_Reg32:$src0), 0,
> +       (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), 
> +       COND_GE))]
> +>;
> +
> +def CNDGT_INT : R600_3OP <
> +	0x1D, "CNDGT_INT",
> +  [(set (i32 R600_Reg32:$dst),
> +   (selectcc (i32 R600_Reg32:$src0), 0,
> +       (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), 
> +       COND_GT))]
>  >;
>  
>  //===----------------------------------------------------------------------===//
> @@ -597,18 +615,26 @@ class MULADD_Common <bits<32> inst> : R600_3OP <
>  
>  class CNDE_Common <bits<32> inst> : R600_3OP <
>    inst, "CNDE",
> -  [(set (f32 R600_Reg32:$dst),
> -   (select (i32 (fp_to_sint (fneg R600_Reg32:$src0))), (f32 R600_Reg32:$src2), (f32 R600_Reg32:$src1)))]
> +  [(set R600_Reg32:$dst,
> +   (selectcc (f32 R600_Reg32:$src0), FP_ZERO, 
> +       (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
> +       COND_EQ))]
>  >;
>  
>  class CNDGT_Common <bits<32> inst> : R600_3OP <
>    inst, "CNDGT",
> -  []
> +  [(set R600_Reg32:$dst,
> +   (selectcc (f32 R600_Reg32:$src0), FP_ZERO, 
> +       (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
> +       COND_GT))]
>  >;
> -  
> +
>  class CNDGE_Common <bits<32> inst> : R600_3OP <
>    inst, "CNDGE",
> -  [(set R600_Reg32:$dst, (int_AMDGPU_cndlt R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))]
> +  [(set R600_Reg32:$dst,
> +   (selectcc (f32 R600_Reg32:$src0), FP_ZERO, 
> +       (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
> +       COND_GE))]
>  >;
>  
>  class DOT4_Common <bits<32> inst> : R600_REDUCTION <
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index 04469e2..bb37154 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -934,6 +934,20 @@ static void emit_u2f(
>  			emit_data->args[0], bld_base->base.elem_type, "");
>  }
>  
> +static void emit_cndlt(
> +		const struct lp_build_tgsi_action * action,
> +		struct lp_build_tgsi_context * bld_base,
> +		struct lp_build_emit_data * emit_data)
> +{
> +	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> +	LLVMValueRef float_zero = lp_build_const_float(
> +		bld_base->base.gallivm, 0.0f);
> +	LLVMValueRef cmp = LLVMBuildFCmp(
> +		builder, LLVMRealULT, emit_data->args[0], float_zero, "");
> +	emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
> +		cmp, emit_data->args[1], emit_data->args[2], "");
> +}
> +
>  static void emit_immediate(struct lp_build_tgsi_context * bld_base,
>  		const struct tgsi_full_immediate *imm)
>  {
> @@ -1115,8 +1129,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
>  	bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
>  	bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem;
>  	bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = "llvm.AMDIL.clamp.";
> -	bld_base->op_actions[TGSI_OPCODE_CMP].emit = build_tgsi_intrinsic_nomem;
> -	bld_base->op_actions[TGSI_OPCODE_CMP].intr_name = "llvm.AMDGPU.cndlt";
> +	bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cndlt;
>  	bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
>  	bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.AMDGPU.cos";
>  	bld_base->op_actions[TGSI_OPCODE_DIV].emit = build_tgsi_intrinsic_nomem;

It would be nice if you could also remove the llvm.AMDGPU.cndlt
intrinsic and also delete the SI pseudo instruction that uses it.  I can
test on SI for you.

-Tom

> -- 
> 1.7.11.4
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev