[Mesa-dev] [PATCH] radeon/llvm: improve select_cc lowering to generate CND* more often
Tom Stellard
tom at stellard.net
Wed Sep 26 12:50:02 PDT 2012
On Wed, Sep 26, 2012 at 09:33:15PM +0200, Vincent Lejeune wrote:
> v2: - Simplify isZero()
> - Remove a unused function prototype
> - Clean whitespace trails
> ---
> src/gallium/drivers/r600/r600_llvm.c | 15 +++++
> src/gallium/drivers/radeon/R600ISelLowering.cpp | 89 +++++++++++++++----------
> src/gallium/drivers/radeon/R600ISelLowering.h | 2 +
> src/gallium/drivers/radeon/R600Instructions.td | 38 +++++++++--
> 4 files changed, 103 insertions(+), 41 deletions(-)
>
This version should be safe for radeonsi.
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
> diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
> index bc2f46e..e935ee2 100644
> --- a/src/gallium/drivers/r600/r600_llvm.c
> +++ b/src/gallium/drivers/r600/r600_llvm.c
> @@ -201,6 +201,20 @@ static void llvm_emit_tex(
> emit_data->dst_type, args, c, LLVMReadNoneAttribute);
> }
>
> +static void emit_cndlt(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> + LLVMValueRef float_zero = lp_build_const_float(
> + bld_base->base.gallivm, 0.0f);
> + LLVMValueRef cmp = LLVMBuildFCmp(
> + builder, LLVMRealULT, emit_data->args[0], float_zero, "");
> + emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
> + cmp, emit_data->args[1], emit_data->args[2], "");
> +}
> +
> static void dp_fetch_args(
> struct lp_build_tgsi_context * bld_base,
> struct lp_build_emit_data * emit_data)
> @@ -277,6 +291,7 @@ LLVMModuleRef r600_tgsi_llvm(
> bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex;
> bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex;
> bld_base->op_actions[TGSI_OPCODE_TXP].emit = llvm_emit_tex;
> + bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cndlt;
>
> lp_build_tgsi_llvm(bld_base, tokens);
>
> diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp
> index 2fc9c67..5dd2f53 100644
> --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp
> +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp
> @@ -516,6 +516,17 @@ SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
> Op.getOperand(1)));
> }
>
> +bool R600TargetLowering::isZero(SDValue Op) const
> +{
> + if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
> + return Cst->isNullValue();
> + } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
> + return CstFP->isZero();
> + } else {
> + return false;
> + }
> +}
> +
> SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
> {
> DebugLoc DL = Op.getDebugLoc();
> @@ -568,47 +579,58 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
> if (isHWTrueValue(False) && isHWFalseValue(True)) {
> }
>
> - // XXX Check if we can lower this to a SELECT or if it is supported by a native
> - // operation. (The code below does this but we don't have the Instruction
> - // selection patterns to do this yet.
> -#if 0
> + // Check if we can lower this to a native operation.
> + // CND* instructions requires all operands to have the same type,
> + // and RHS to be zero.
> +
> if (isZero(LHS) || isZero(RHS)) {
> SDValue Cond = (isZero(LHS) ? RHS : LHS);
> - bool SwapTF = false;
> + SDValue Zero = (isZero(LHS) ? LHS : RHS);
> + ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
> + if (CompareVT != VT) {
> + True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
> + False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
> + }
> + if (isZero(LHS)) {
> + CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
> + }
> +
> switch (CCOpcode) {
> - case ISD::SETOEQ:
> - case ISD::SETUEQ:
> - case ISD::SETEQ:
> - SwapTF = true;
> - // Fall through
> case ISD::SETONE:
> case ISD::SETUNE:
> case ISD::SETNE:
> - // We can lower to select
> - if (SwapTF) {
> - Temp = True;
> - True = False;
> - False = Temp;
> - }
> - // CNDE
> - return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
> + case ISD::SETULE:
> + case ISD::SETULT:
> + case ISD::SETOLE:
> + case ISD::SETOLT:
> + case ISD::SETLE:
> + case ISD::SETLT:
> + CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
> + Temp = True;
> + True = False;
> + False = Temp;
> + break;
> default:
> - // Supported by a native operation (CNDGE, CNDGT)
> - return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
> + break;
> }
> + SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
> + Cond, Zero,
> + True, False,
> + DAG.getCondCode(CCOpcode));
> + return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
> }
> -#endif
> +
>
> // If we make it this for it means we have no native instructions to handle
> // this SELECT_CC, so we must lower it.
> SDValue HWTrue, HWFalse;
>
> - if (VT == MVT::f32) {
> - HWTrue = DAG.getConstantFP(1.0f, VT);
> - HWFalse = DAG.getConstantFP(0.0f, VT);
> - } else if (VT == MVT::i32) {
> - HWTrue = DAG.getConstant(-1, VT);
> - HWFalse = DAG.getConstant(0, VT);
> + if (CompareVT == MVT::f32) {
> + HWTrue = DAG.getConstantFP(1.0f, CompareVT);
> + HWFalse = DAG.getConstantFP(0.0f, CompareVT);
> + } else if (CompareVT == MVT::i32) {
> + HWTrue = DAG.getConstant(-1, CompareVT);
> + HWFalse = DAG.getConstant(0, CompareVT);
> }
> else {
> assert(!"Unhandled value type in LowerSELECT_CC");
> @@ -616,15 +638,12 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
>
> // Lower this unsupported SELECT_CC into a combination of two supported
> // SELECT_CC operations.
> - SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
> -
> - // Convert floating point condition to i1
> - if (VT == MVT::f32) {
> - Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32,
> - DAG.getNode(ISD::FNEG, DL, VT, Cond));
> - }
> + SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
>
> - return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
> + return DAG.getNode(ISD::SELECT_CC, DL, VT,
> + Cond, HWFalse,
> + True, False,
> + DAG.getCondCode(ISD::SETNE));
> }
>
> SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
> diff --git a/src/gallium/drivers/radeon/R600ISelLowering.h b/src/gallium/drivers/radeon/R600ISelLowering.h
> index 7b9c27e..7df2dd1 100644
> --- a/src/gallium/drivers/radeon/R600ISelLowering.h
> +++ b/src/gallium/drivers/radeon/R600ISelLowering.h
> @@ -60,6 +60,8 @@ private:
> SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
> SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const;
> SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
> +
> + bool isZero(SDValue Op) const;
> };
>
> } // End namespace llvm;
> diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
> index feb97fa..1689a2f 100644
> --- a/src/gallium/drivers/radeon/R600Instructions.td
> +++ b/src/gallium/drivers/radeon/R600Instructions.td
> @@ -545,7 +545,25 @@ def SETGE_UINT : R600_2OP <
> def CNDE_INT : R600_3OP <
> 0x1C, "CNDE_INT",
> [(set (i32 R600_Reg32:$dst),
> - (select R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))]
> + (selectcc (i32 R600_Reg32:$src0), 0,
> + (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
> + COND_EQ))]
> +>;
> +
> +def CNDGE_INT : R600_3OP <
> + 0x1E, "CNDGE_INT",
> + [(set (i32 R600_Reg32:$dst),
> + (selectcc (i32 R600_Reg32:$src0), 0,
> + (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
> + COND_GE))]
> +>;
> +
> +def CNDGT_INT : R600_3OP <
> + 0x1D, "CNDGT_INT",
> + [(set (i32 R600_Reg32:$dst),
> + (selectcc (i32 R600_Reg32:$src0), 0,
> + (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
> + COND_GT))]
> >;
>
> //===----------------------------------------------------------------------===//
> @@ -642,18 +660,26 @@ class MULADD_Common <bits<11> inst> : R600_3OP <
>
> class CNDE_Common <bits<11> inst> : R600_3OP <
> inst, "CNDE",
> - [(set (f32 R600_Reg32:$dst),
> - (select (i32 (fp_to_sint (fneg R600_Reg32:$src0))), (f32 R600_Reg32:$src2), (f32 R600_Reg32:$src1)))]
> + [(set R600_Reg32:$dst,
> + (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
> + (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
> + COND_EQ))]
> >;
>
> class CNDGT_Common <bits<11> inst> : R600_3OP <
> inst, "CNDGT",
> - []
> + [(set R600_Reg32:$dst,
> + (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
> + (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
> + COND_GT))]
> >;
> -
> +
> class CNDGE_Common <bits<11> inst> : R600_3OP <
> inst, "CNDGE",
> - [(set R600_Reg32:$dst, (int_AMDGPU_cndlt R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))]
> + [(set R600_Reg32:$dst,
> + (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
> + (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
> + COND_GE))]
> >;
>
> class DOT4_Common <bits<11> inst> : R600_REDUCTION <
> --
> 1.7.11.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list