[Mesa-dev] [PATCH 2/2] radeon/llvm: add a pattern for min/max

Tue Dec 4 17:24:52 PST 2012

On Wed, Dec 05, 2012 at 12:09:41AM +0100, Vincent Lejeune wrote:
> ---
>  lib/Target/AMDGPU/R600ISelLowering.cpp | 47 ++++++++++++++++++++++++++++++++
>  lib/Target/AMDGPU/R600Instructions.td  | 12 +++++++--
>  lib/Target/AMDGPU/SIISelLowering.cpp   | 49 ++++++++++++++++++++++++++++++++++
>  lib/Target/AMDGPU/SIInstructions.td    | 10 +++++--
>  4 files changed, 114 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
> index 6f1c1d7..1103ff4 100644
> --- a/lib/Target/AMDGPU/R600ISelLowering.cpp
> +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
> @@ -764,6 +764,53 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
>      }
>    }
>  
> +  // Standardize Min/Max pattern if applicable

It should be easy to consolidate these, just add a function to
the AMDGPUISelLowering class to handle Max/Min and call it from
LowerSelectCC. There already exist custom AMDGPU DAG nodes FMIN, FMAX
that you can lower them to.  Then you won't need to change the tablegen
files at all.

-Tom
> +  if (CompareVT == VT == MVT::f32 &&
> +      ((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
> +    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
> +    switch (CCOpcode) {
> +    case ISD::SETOEQ:
> +    case ISD::SETONE:
> +    case ISD::SETUNE:
> +    case ISD::SETNE:
> +    case ISD::SETUEQ:
> +    case ISD::SETEQ:
> +    case ISD::SETFALSE:
> +    case ISD::SETFALSE2:
> +    case ISD::SETTRUE:
> +    case ISD::SETTRUE2:
> +    case ISD::SETUO:
> +    case ISD::SETO:
> +      assert(0 && "Operation should already be optimised !");
> +    case ISD::SETULE:
> +    case ISD::SETULT:
> +    case ISD::SETOLE:
> +    case ISD::SETOLT:
> +    case ISD::SETLE:
> +    case ISD::SETLT:
> +      CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
> +      Temp = True;
> +      True = False;
> +      False = Temp;
> +      break;
> +    case ISD::SETGT:
> +    case ISD::SETGE:
> +    case ISD::SETUGE:
> +    case ISD::SETOGE:
> +    case ISD::SETUGT:
> +    case ISD::SETOGT:
> +      CCOpcode = ISD::SETUGE;
> +      break;
> +    case ISD::SETCC_INVALID:
> +      assert(0 && "Invalid setcc condcode !");
> +    }
> +    return DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
> +        LHS, RHS,
> +        True, False,
> +        DAG.getCondCode(CCOpcode));
> +    return Op;
> +  }
> +
>    // If we make it this for it means we have no native instructions to handle
>    // this SELECT_CC, so we must lower it.
>    SDValue HWTrue, HWFalse;
> diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
> index d75c32e..43a7552 100644
> --- a/lib/Target/AMDGPU/R600Instructions.td
> +++ b/lib/Target/AMDGPU/R600Instructions.td
> @@ -555,8 +555,16 @@ def ADD : R600_2OP_Helper <0x0, "ADD", fadd>;
>  // Non-IEEE MUL: 0 * anything = 0
>  def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", fmul>;
>  def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", int_AMDGPU_mul>;
> -def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>;
> -def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
> +def MAX : R600_2OP <0x3, "MAX",
> +  [(set R600_Reg32:$dst,
> +  (selectcc (f32 R600_Reg32:$src0), (f32 R600_Reg32:$src1),
> +      (f32 R600_Reg32:$src0), (f32 R600_Reg32:$src1),
> +      COND_GE))]>;
> +def MIN : R600_2OP <0x4, "MIN",
> +  [(set R600_Reg32:$dst,
> +  (selectcc (f32 R600_Reg32:$src0), (f32 R600_Reg32:$src1),
> +      (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src0),
> +      COND_GE))]>;
>  
>  // For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
>  // so some of the instruction names don't match the asm string.
> diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
> index 45f180f..638c9df 100644
> --- a/lib/Target/AMDGPU/SIISelLowering.cpp
> +++ b/lib/Target/AMDGPU/SIISelLowering.cpp
> @@ -383,6 +383,55 @@ SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
>    EVT VT = Op.getValueType();
>    DebugLoc DL = Op.getDebugLoc();
>  
> +  // FIXME: This should be factored between R600 and SI
> +  // Standardize Min/Max pattern if applicable
> +  EVT CompareVT = LHS.getValueType();
> +  SDValue Temp;
> +  if ((LHS == True && RHS == False) || (LHS == False && RHS == True)) {
> +    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
> +    switch (CCOpcode) {
> +    case ISD::SETOEQ:
> +    case ISD::SETONE:
> +    case ISD::SETUNE:
> +    case ISD::SETNE:
> +    case ISD::SETUEQ:
> +    case ISD::SETEQ:
> +    case ISD::SETFALSE:
> +    case ISD::SETFALSE2:
> +    case ISD::SETTRUE:
> +    case ISD::SETTRUE2:
> +    case ISD::SETUO:
> +    case ISD::SETO:
> +      assert(0 && "Operation should already be optimised !");
> +    case ISD::SETULE:
> +    case ISD::SETULT:
> +    case ISD::SETOLE:
> +    case ISD::SETOLT:
> +    case ISD::SETLE:
> +    case ISD::SETLT:
> +      CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
> +      Temp = True;
> +      True = False;
> +      False = Temp;
> +      break;
> +    case ISD::SETGT:
> +    case ISD::SETGE:
> +    case ISD::SETUGE:
> +    case ISD::SETOGE:
> +    case ISD::SETUGT:
> +    case ISD::SETOGT:
> +      CCOpcode = ISD::SETUGE;
> +      break;
> +    case ISD::SETCC_INVALID:
> +      assert(0 && "Invalid setcc condcode !");
> +    }
> +    return DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
> +        LHS, RHS,
> +        True, False,
> +        DAG.getCondCode(CCOpcode));
> +    return Op;
> +  }
> +
>    SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC);
>    return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
>  }
> diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
> index 2354b2e..d8c060c 100644
> --- a/lib/Target/AMDGPU/SIInstructions.td
> +++ b/lib/Target/AMDGPU/SIInstructions.td
> @@ -794,11 +794,17 @@ defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
>  //defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", []>;
>  //defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>;
>  defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32",
> -  [(set VReg_32:$dst, (AMDGPUfmin AllReg_32:$src0, VReg_32:$src1))]
> +  [(set VReg_32:$dst,
> +      (selectcc (f32 AllReg_32:$src0), (f32 VReg_32:$src1),
> +      (f32 VReg_32:$src1), (f32 AllReg_32:$src0),
> +      COND_GE))]
>  >;
>  
>  defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
> -  [(set VReg_32:$dst, (AMDGPUfmax AllReg_32:$src0, VReg_32:$src1))]
> +  [(set VReg_32:$dst,
> +      (selectcc (f32 AllReg_32:$src0), (f32 VReg_32:$src1),
> +      (f32 AllReg_32:$src0), (f32 VReg_32:$src1),
> +      COND_GE))]
>  >;
>  defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
>  defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
> -- 
> 1.8.0.1
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev