[Mesa-dev] [PATCH 2/2] radeon/llvm: add a pattern for min/max

Tue Dec 4 15:09:41 PST 2012

---
 lib/Target/AMDGPU/R600ISelLowering.cpp | 47 ++++++++++++++++++++++++++++++++
 lib/Target/AMDGPU/R600Instructions.td  | 12 +++++++--
 lib/Target/AMDGPU/SIISelLowering.cpp   | 49 ++++++++++++++++++++++++++++++++++
 lib/Target/AMDGPU/SIInstructions.td    | 10 +++++--
 4 files changed, 114 insertions(+), 4 deletions(-)

diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 6f1c1d7..1103ff4 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -764,6 +764,53 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
     }
   }
 
+  // Standardize Min/Max pattern if applicable
+  if (CompareVT == VT == MVT::f32 &&
+      ((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
+    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+    switch (CCOpcode) {
+    case ISD::SETOEQ:
+    case ISD::SETONE:
+    case ISD::SETUNE:
+    case ISD::SETNE:
+    case ISD::SETUEQ:
+    case ISD::SETEQ:
+    case ISD::SETFALSE:
+    case ISD::SETFALSE2:
+    case ISD::SETTRUE:
+    case ISD::SETTRUE2:
+    case ISD::SETUO:
+    case ISD::SETO:
+      assert(0 && "Operation should already be optimised !");
+    case ISD::SETULE:
+    case ISD::SETULT:
+    case ISD::SETOLE:
+    case ISD::SETOLT:
+    case ISD::SETLE:
+    case ISD::SETLT:
+      CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
+      Temp = True;
+      True = False;
+      False = Temp;
+      break;
+    case ISD::SETGT:
+    case ISD::SETGE:
+    case ISD::SETUGE:
+    case ISD::SETOGE:
+    case ISD::SETUGT:
+    case ISD::SETOGT:
+      CCOpcode = ISD::SETUGE;
+      break;
+    case ISD::SETCC_INVALID:
+      assert(0 && "Invalid setcc condcode !");
+    }
+    return DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
+        LHS, RHS,
+        True, False,
+        DAG.getCondCode(CCOpcode));
+    return Op;
+  }
+
   // If we make it this for it means we have no native instructions to handle
   // this SELECT_CC, so we must lower it.
   SDValue HWTrue, HWFalse;
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index d75c32e..43a7552 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -555,8 +555,16 @@ def ADD : R600_2OP_Helper <0x0, "ADD", fadd>;
 // Non-IEEE MUL: 0 * anything = 0
 def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", fmul>;
 def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", int_AMDGPU_mul>;
-def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>;
-def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
+def MAX : R600_2OP <0x3, "MAX",
+  [(set R600_Reg32:$dst,
+  (selectcc (f32 R600_Reg32:$src0), (f32 R600_Reg32:$src1),
+      (f32 R600_Reg32:$src0), (f32 R600_Reg32:$src1),
+      COND_GE))]>;
+def MIN : R600_2OP <0x4, "MIN",
+  [(set R600_Reg32:$dst,
+  (selectcc (f32 R600_Reg32:$src0), (f32 R600_Reg32:$src1),
+      (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src0),
+      COND_GE))]>;
 
 // For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
 // so some of the instruction names don't match the asm string.
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 45f180f..638c9df 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -383,6 +383,55 @@ SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
   EVT VT = Op.getValueType();
   DebugLoc DL = Op.getDebugLoc();
 
+  // FIXME: This should be factored between R600 and SI
+  // Standardize Min/Max pattern if applicable
+  EVT CompareVT = LHS.getValueType();
+  SDValue Temp;
+  if ((LHS == True && RHS == False) || (LHS == False && RHS == True)) {
+    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+    switch (CCOpcode) {
+    case ISD::SETOEQ:
+    case ISD::SETONE:
+    case ISD::SETUNE:
+    case ISD::SETNE:
+    case ISD::SETUEQ:
+    case ISD::SETEQ:
+    case ISD::SETFALSE:
+    case ISD::SETFALSE2:
+    case ISD::SETTRUE:
+    case ISD::SETTRUE2:
+    case ISD::SETUO:
+    case ISD::SETO:
+      assert(0 && "Operation should already be optimised !");
+    case ISD::SETULE:
+    case ISD::SETULT:
+    case ISD::SETOLE:
+    case ISD::SETOLT:
+    case ISD::SETLE:
+    case ISD::SETLT:
+      CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
+      Temp = True;
+      True = False;
+      False = Temp;
+      break;
+    case ISD::SETGT:
+    case ISD::SETGE:
+    case ISD::SETUGE:
+    case ISD::SETOGE:
+    case ISD::SETUGT:
+    case ISD::SETOGT:
+      CCOpcode = ISD::SETUGE;
+      break;
+    case ISD::SETCC_INVALID:
+      assert(0 && "Invalid setcc condcode !");
+    }
+    return DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
+        LHS, RHS,
+        True, False,
+        DAG.getCondCode(CCOpcode));
+    return Op;
+  }
+
   SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC);
   return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
 }
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index 2354b2e..d8c060c 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -794,11 +794,17 @@ defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
 //defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", []>;
 //defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>;
 defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32",
-  [(set VReg_32:$dst, (AMDGPUfmin AllReg_32:$src0, VReg_32:$src1))]
+  [(set VReg_32:$dst,
+      (selectcc (f32 AllReg_32:$src0), (f32 VReg_32:$src1),
+      (f32 VReg_32:$src1), (f32 AllReg_32:$src0),
+      COND_GE))]
 >;
 
 defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
-  [(set VReg_32:$dst, (AMDGPUfmax AllReg_32:$src0, VReg_32:$src1))]
+  [(set VReg_32:$dst,
+      (selectcc (f32 AllReg_32:$src0), (f32 VReg_32:$src1),
+      (f32 AllReg_32:$src0), (f32 VReg_32:$src1),
+      COND_GE))]
 >;
 defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
 defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
-- 
1.8.0.1