Mesa (master): radeon/llvm: improve select_cc lowering to generate CND* more often

Wed Sep 26 23:44:05 UTC 2012

Module: Mesa
Branch: master
Commit: ff947c6d65830b7be6e9fcbfe666fa7dba6341f6
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=ff947c6d65830b7be6e9fcbfe666fa7dba6341f6

Author: Vincent Lejeune <vljn at ovi.com>
Date:   Mon Sep 24 16:04:26 2012 +0200

radeon/llvm: improve select_cc lowering to generate CND* more often

v2: - Simplify isZero()
    - Remove a unused function prototype
    - Clean whitespace trails

Reviewed-by: Tom Stellard <thomas.stellard at amd.com>

---

 src/gallium/drivers/r600/r600_llvm.c            |   15 ++++
 src/gallium/drivers/radeon/R600ISelLowering.cpp |   89 ++++++++++++++---------
 src/gallium/drivers/radeon/R600ISelLowering.h   |    2 +
 src/gallium/drivers/radeon/R600Instructions.td  |   38 ++++++++--
 4 files changed, 103 insertions(+), 41 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index dd0a714..71ea578 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -165,6 +165,20 @@ static void llvm_emit_tex(
 					emit_data->dst_type, args, c, LLVMReadNoneAttribute);
 }
 
+static void emit_cndlt(
+		const struct lp_build_tgsi_action * action,
+		struct lp_build_tgsi_context * bld_base,
+		struct lp_build_emit_data * emit_data)
+{
+	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+	LLVMValueRef float_zero = lp_build_const_float(
+		bld_base->base.gallivm, 0.0f);
+	LLVMValueRef cmp = LLVMBuildFCmp(
+		builder, LLVMRealULT, emit_data->args[0], float_zero, "");
+	emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
+		cmp, emit_data->args[1], emit_data->args[2], "");
+}
+
 static void dp_fetch_args(
 	struct lp_build_tgsi_context * bld_base,
 	struct lp_build_emit_data * emit_data)
@@ -241,6 +255,7 @@ LLVMModuleRef r600_tgsi_llvm(
 	bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex;
 	bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex;
 	bld_base->op_actions[TGSI_OPCODE_TXP].emit = llvm_emit_tex;
+	bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cndlt;
 
 	lp_build_tgsi_llvm(bld_base, tokens);
 
diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp
index 2fc9c67..5dd2f53 100644
--- a/src/gallium/drivers/radeon/R600ISelLowering.cpp
+++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp
@@ -516,6 +516,17 @@ SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
                                  Op.getOperand(1)));
 }
 
+bool R600TargetLowering::isZero(SDValue Op) const
+{
+  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
+    return Cst->isNullValue();
+  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
+    return CstFP->isZero();
+  } else {
+    return false;
+  }
+}
+
 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
 {
   DebugLoc DL = Op.getDebugLoc();
@@ -568,47 +579,58 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
   if (isHWTrueValue(False) && isHWFalseValue(True)) {
   }
 
-  // XXX Check if we can lower this to a SELECT or if it is supported by a native
-  // operation. (The code below does this but we don't have the Instruction
-  // selection patterns to do this yet.
-#if 0
+  // Check if we can lower this to a native operation.
+  // CND* instructions requires all operands to have the same type,
+  // and RHS to be zero.
+
   if (isZero(LHS) || isZero(RHS)) {
     SDValue Cond = (isZero(LHS) ? RHS : LHS);
-    bool SwapTF = false;
+    SDValue Zero = (isZero(LHS) ? LHS : RHS);
+    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+    if (CompareVT != VT) {
+      True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
+      False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
+    }
+    if (isZero(LHS)) {
+      CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
+    }
+
     switch (CCOpcode) {
-    case ISD::SETOEQ:
-    case ISD::SETUEQ:
-    case ISD::SETEQ:
-      SwapTF = true;
-      // Fall through
     case ISD::SETONE:
     case ISD::SETUNE:
     case ISD::SETNE:
-      // We can lower to select
-      if (SwapTF) {
-        Temp = True;
-        True = False;
-        False = Temp;
-      }
-      // CNDE
-      return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
+    case ISD::SETULE:
+    case ISD::SETULT:
+    case ISD::SETOLE:
+    case ISD::SETOLT:
+    case ISD::SETLE:
+    case ISD::SETLT:
+      CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
+      Temp = True;
+      True = False;
+      False = Temp;
+      break;
     default:
-      // Supported by a native operation (CNDGE, CNDGT)
-      return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
+      break;
     }
+    SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
+        Cond, Zero,
+        True, False,
+        DAG.getCondCode(CCOpcode));
+    return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
   }
-#endif
+
 
   // If we make it this for it means we have no native instructions to handle
   // this SELECT_CC, so we must lower it.
   SDValue HWTrue, HWFalse;
 
-  if (VT == MVT::f32) {
-    HWTrue = DAG.getConstantFP(1.0f, VT);
-    HWFalse = DAG.getConstantFP(0.0f, VT);
-  } else if (VT == MVT::i32) {
-    HWTrue = DAG.getConstant(-1, VT);
-    HWFalse = DAG.getConstant(0, VT);
+  if (CompareVT == MVT::f32) {
+    HWTrue = DAG.getConstantFP(1.0f, CompareVT);
+    HWFalse = DAG.getConstantFP(0.0f, CompareVT);
+  } else if (CompareVT == MVT::i32) {
+    HWTrue = DAG.getConstant(-1, CompareVT);
+    HWFalse = DAG.getConstant(0, CompareVT);
   }
   else {
     assert(!"Unhandled value type in LowerSELECT_CC");
@@ -616,15 +638,12 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
 
   // Lower this unsupported SELECT_CC into a combination of two supported
   // SELECT_CC operations.
-  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
-
-  // Convert floating point condition to i1
-  if (VT == MVT::f32) {
-    Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32,
-                       DAG.getNode(ISD::FNEG, DL, VT, Cond));
-  }
+  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
 
-  return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
+  return DAG.getNode(ISD::SELECT_CC, DL, VT,
+      Cond, HWFalse,
+      True, False,
+      DAG.getCondCode(ISD::SETNE));
 }
 
 SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
diff --git a/src/gallium/drivers/radeon/R600ISelLowering.h b/src/gallium/drivers/radeon/R600ISelLowering.h
index 7b9c27e..7df2dd1 100644
--- a/src/gallium/drivers/radeon/R600ISelLowering.h
+++ b/src/gallium/drivers/radeon/R600ISelLowering.h
@@ -60,6 +60,8 @@ private:
   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const;
   SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
+  
+  bool isZero(SDValue Op) const;
 };
 
 } // End namespace llvm;
diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
index feb97fa..1689a2f 100644
--- a/src/gallium/drivers/radeon/R600Instructions.td
+++ b/src/gallium/drivers/radeon/R600Instructions.td
@@ -545,7 +545,25 @@ def SETGE_UINT : R600_2OP <
 def CNDE_INT : R600_3OP <
 	0x1C, "CNDE_INT",
   [(set (i32 R600_Reg32:$dst),
-   (select R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))]
+   (selectcc (i32 R600_Reg32:$src0), 0,
+       (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
+       COND_EQ))]
+>;
+
+def CNDGE_INT : R600_3OP <
+	0x1E, "CNDGE_INT",
+  [(set (i32 R600_Reg32:$dst),
+   (selectcc (i32 R600_Reg32:$src0), 0,
+       (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
+       COND_GE))]
+>;
+
+def CNDGT_INT : R600_3OP <
+	0x1D, "CNDGT_INT",
+  [(set (i32 R600_Reg32:$dst),
+   (selectcc (i32 R600_Reg32:$src0), 0,
+       (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
+       COND_GT))]
 >;
 
 //===----------------------------------------------------------------------===//
@@ -642,18 +660,26 @@ class MULADD_Common <bits<11> inst> : R600_3OP <
 
 class CNDE_Common <bits<11> inst> : R600_3OP <
   inst, "CNDE",
-  [(set (f32 R600_Reg32:$dst),
-   (select (i32 (fp_to_sint (fneg R600_Reg32:$src0))), (f32 R600_Reg32:$src2), (f32 R600_Reg32:$src1)))]
+  [(set R600_Reg32:$dst,
+   (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
+       (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
+       COND_EQ))]
 >;
 
 class CNDGT_Common <bits<11> inst> : R600_3OP <
   inst, "CNDGT",
-  []
+  [(set R600_Reg32:$dst,
+   (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
+       (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
+       COND_GT))]
 >;
-  
+
 class CNDGE_Common <bits<11> inst> : R600_3OP <
   inst, "CNDGE",
-  [(set R600_Reg32:$dst, (int_AMDGPU_cndlt R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))]
+  [(set R600_Reg32:$dst,
+   (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
+       (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
+       COND_GE))]
 >;
 
 class DOT4_Common <bits<11> inst> : R600_REDUCTION <