[Mesa-dev] [PATCH 2/2] radeon/llvm: add a pattern for min/max
Vincent Lejeune
vljn at ovi.com
Tue Dec 4 15:09:41 PST 2012
---
lib/Target/AMDGPU/R600ISelLowering.cpp | 47 ++++++++++++++++++++++++++++++++
lib/Target/AMDGPU/R600Instructions.td | 12 +++++++--
lib/Target/AMDGPU/SIISelLowering.cpp | 49 ++++++++++++++++++++++++++++++++++
lib/Target/AMDGPU/SIInstructions.td | 10 +++++--
4 files changed, 114 insertions(+), 4 deletions(-)
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 6f1c1d7..1103ff4 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -764,6 +764,53 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
}
}
+ // Standardize Min/Max pattern if applicable
+ if (CompareVT == VT == MVT::f32 &&
+ ((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ switch (CCOpcode) {
+ case ISD::SETOEQ:
+ case ISD::SETONE:
+ case ISD::SETUNE:
+ case ISD::SETNE:
+ case ISD::SETUEQ:
+ case ISD::SETEQ:
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2:
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2:
+ case ISD::SETUO:
+ case ISD::SETO:
+ assert(0 && "Operation should already be optimised !");
+ case ISD::SETULE:
+ case ISD::SETULT:
+ case ISD::SETOLE:
+ case ISD::SETOLT:
+ case ISD::SETLE:
+ case ISD::SETLT:
+ CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
+ Temp = True;
+ True = False;
+ False = Temp;
+ break;
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGE:
+ case ISD::SETOGE:
+ case ISD::SETUGT:
+ case ISD::SETOGT:
+ CCOpcode = ISD::SETUGE;
+ break;
+ case ISD::SETCC_INVALID:
+ assert(0 && "Invalid setcc condcode !");
+ }
+ return DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
+ LHS, RHS,
+ True, False,
+ DAG.getCondCode(CCOpcode));
+ return Op;
+ }
+
// If we make it this for it means we have no native instructions to handle
// this SELECT_CC, so we must lower it.
SDValue HWTrue, HWFalse;
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index d75c32e..43a7552 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -555,8 +555,16 @@ def ADD : R600_2OP_Helper <0x0, "ADD", fadd>;
// Non-IEEE MUL: 0 * anything = 0
def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", fmul>;
def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", int_AMDGPU_mul>;
-def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>;
-def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
+def MAX : R600_2OP <0x3, "MAX",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), (f32 R600_Reg32:$src1),
+ (f32 R600_Reg32:$src0), (f32 R600_Reg32:$src1),
+ COND_GE))]>;
+def MIN : R600_2OP <0x4, "MIN",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), (f32 R600_Reg32:$src1),
+ (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src0),
+ COND_GE))]>;
// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
// so some of the instruction names don't match the asm string.
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 45f180f..638c9df 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -383,6 +383,55 @@ SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
EVT VT = Op.getValueType();
DebugLoc DL = Op.getDebugLoc();
+ // FIXME: This should be factored between R600 and SI
+ // Standardize Min/Max pattern if applicable
+ EVT CompareVT = LHS.getValueType();
+ SDValue Temp;
+ if ((LHS == True && RHS == False) || (LHS == False && RHS == True)) {
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ switch (CCOpcode) {
+ case ISD::SETOEQ:
+ case ISD::SETONE:
+ case ISD::SETUNE:
+ case ISD::SETNE:
+ case ISD::SETUEQ:
+ case ISD::SETEQ:
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2:
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2:
+ case ISD::SETUO:
+ case ISD::SETO:
+ assert(0 && "Operation should already be optimised !");
+ case ISD::SETULE:
+ case ISD::SETULT:
+ case ISD::SETOLE:
+ case ISD::SETOLT:
+ case ISD::SETLE:
+ case ISD::SETLT:
+ CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
+ Temp = True;
+ True = False;
+ False = Temp;
+ break;
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGE:
+ case ISD::SETOGE:
+ case ISD::SETUGT:
+ case ISD::SETOGT:
+ CCOpcode = ISD::SETUGE;
+ break;
+ case ISD::SETCC_INVALID:
+ assert(0 && "Invalid setcc condcode !");
+ }
+ return DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
+ LHS, RHS,
+ True, False,
+ DAG.getCondCode(CCOpcode));
+ return Op;
+ }
+
SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC);
return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
}
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index 2354b2e..d8c060c 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -794,11 +794,17 @@ defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
//defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", []>;
//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>;
defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32",
- [(set VReg_32:$dst, (AMDGPUfmin AllReg_32:$src0, VReg_32:$src1))]
+ [(set VReg_32:$dst,
+ (selectcc (f32 AllReg_32:$src0), (f32 VReg_32:$src1),
+ (f32 VReg_32:$src1), (f32 AllReg_32:$src0),
+ COND_GE))]
>;
defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
- [(set VReg_32:$dst, (AMDGPUfmax AllReg_32:$src0, VReg_32:$src1))]
+ [(set VReg_32:$dst,
+ (selectcc (f32 AllReg_32:$src0), (f32 VReg_32:$src1),
+ (f32 AllReg_32:$src0), (f32 VReg_32:$src1),
+ COND_GE))]
>;
defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
--
1.8.0.1
More information about the mesa-dev
mailing list