<div dir="ltr"><blockquote type="cite">From: Tom Stellard <thomas.stellard at <a href="http://amd.com" target="_blank">amd.com</a>><br>
<br>
These instructions compare two floating point values and return an<br>
integer true (-1) or false (0) value.<br>
<br>
When compiling code generated by the Mesa GLSL frontend, the SET*_DX10<br>
instructions save us four instructions for most branch decisions that<br>
use floating-point comparisons.<br>
---<br>
lib/Target/R600/R600ISelLowering.cpp | 108 +++++++++++++++++++-------<br>
lib/Target/R600/R600Instructions.td | 52 +++++++++++++<br>
test/CodeGen/R600/fcmp.ll | 4 +-<br>
test/CodeGen/R600/set-dx10.ll | 137 ++++++++++++++++++++++++++++++++++<br>
test/CodeGen/R600/unsuported-cc.ll | 24 +++---<br>
5 files changed, 281 insertions(+), 44 deletions(-)<br>
create mode 100644 test/CodeGen/R600/set-dx10.ll<br>
<br>
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp<br>
index abfee16..c4aa172 100644<br>
--- a/lib/Target/R600/R600ISelLowering.cpp<br>
+++ b/lib/Target/R600/R600ISelLowering.cpp<br>
@@ -90,7 +90,9 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :<br>
setOperationAction(ISD::FrameIndex, MVT::i32, Custom);<br>
<br>
setTargetDAGCombine(ISD::FP_ROUND);<br>
+ setTargetDAGCombine(ISD::FP_TO_SINT);<br>
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);<br>
+ setTargetDAGCombine(ISD::SELECT_CC);<br>
<br>
setSchedulingPreference(Sched::VLIW);<br>
}<br>
@@ -663,9 +665,12 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const<br>
}<br>
<br>
// Try to lower to a SET* instruction:<br>
- // We need all the operands of SELECT_CC to have the same value type, so if<br>
- // necessary we need to change True and False to be the same type as LHS and<br>
- // RHS, and then convert the result of the select_cc back to the correct type.<br>
+ //<br>
+ // CompareVT == MVT::f32 and VT == MVT::i32 is supported by the hardware,<br>
+ // but for the other case where CompareVT != VT, all operands of<br>
+ // SELECT_CC to have the same value type, so we need to change True and False<br>
</blockquote>
"all operands of SELECT_CC to have". Maybe "need to have"?<br>
<br>
<blockquote type="cite">+ // to be the same type as LHS and RHS, and then convert the result of the<br>
+ // select_cc back to the correct type.<br>
<br>
// Move hardware True/False values to the correct operand.<br>
if (isHWTrueValue(False) && isHWFalseValue(True)) {<br>
@@ -675,32 +680,17 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const<br>
}<br>
<br>
if (isHWTrueValue(True) && isHWFalseValue(False)) {<br>
- if (CompareVT != VT) {<br>
- if (VT == MVT::f32 && CompareVT == MVT::i32) {<br>
- SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,<br>
- LHS, RHS,<br>
- DAG.getConstant(-1, MVT::i32),<br>
- DAG.getConstant(0, MVT::i32),<br>
- CC);<br>
- // Convert integer values of true (-1) and false (0) to fp values of<br>
- // true (1.0f) and false (0.0f).<br>
- SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,<br>
- DAG.getConstant(1, MVT::i32));<br>
- return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);<br>
- } else if (VT == MVT::i32 && CompareVT == MVT::f32) {<br>
- SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,<br>
- LHS, RHS,<br>
- DAG.getConstantFP(1.0f, MVT::f32),<br>
- DAG.getConstantFP(0.0f, MVT::f32),<br>
- CC);<br>
- // Convert fp values of true (1.0f) and false (0.0f) to integer values<br>
- // of true (-1) and false (0).<br>
- SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt);<br>
- return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg);<br>
- } else {<br>
- // I don't think there will be any other type pairings.<br>
- assert(!"Unhandled operand type parings in SELECT_CC");<br>
- }<br>
+ if (CompareVT != VT && VT == MVT::f32 && CompareVT == MVT::i32) {<br>
+ SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,<br>
+ LHS, RHS,<br>
+ DAG.getConstant(-1, MVT::i32),<br>
+ DAG.getConstant(0, MVT::i32),<br>
+ CC);<br>
+ // Convert integer values of true (-1) and false (0) to fp values of<br>
+ // true (1.0f) and false (0.0f).<br>
+ SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,<br>
+ DAG.getConstant(1, MVT::i32));<br>
+ return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);<br>
} else {<br>
// This SELECT_CC is already legal.<br>
return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);<br>
@@ -1121,6 +1111,35 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,<br>
}<br>
break;<br>
}<br>
+<br>
+ // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) -><br>
+ // (i32 select_cc f32, f32, -1, 0 cc)<br>
+ //<br>
+ // Mesa's GLSL frontend generates the above pattern a lot and we can lower<br>
+ // this to one of the SET*_DX10 instructions.<br>
+ case ISD::FP_TO_SINT: {<br>
+ SDValue FNeg = N->getOperand(0);<br>
+ if (FNeg.getOpcode() != ISD::FNEG) {<br>
+ return SDValue();<br>
+ }<br>
+ SDValue SelectCC = FNeg.getOperand(0);<br>
+ if (SelectCC.getOpcode() != ISD::SELECT_CC ||<br>
+ SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS<br>
+ SelectCC.getOperand(2).getValueType() != MVT::f32 || // True<br>
+ !isHWTrueValue(SelectCC.getOperand(2)) ||<br>
+ !isHWFalseValue(SelectCC.getOperand(3))) {<br>
+ return SDValue();<br>
+ }<br>
+<br>
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N->getValueType(0),<br>
+ SelectCC.getOperand(0), // LHS<br>
+ SelectCC.getOperand(1), // RHS<br>
+ DAG.getConstant(-1, MVT::i32), // True<br>
+ DAG.getConstant(0, MVT::i32), // Flase<br>
+ SelectCC.getOperand(4)); // CC<br>
+<br>
+ break;<br>
+ }<br>
// Extract_vec (Build_vector) generated by custom lowering<br>
// also needs to be customly combined<br>
case ISD::EXTRACT_VECTOR_ELT: {<br>
@@ -1140,6 +1159,37 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,<br>
}<br>
}<br>
}<br>
+<br>
+ case ISD::SELECT_CC: {<br>
+ // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq -><br>
+ // selectcc x, y, a, b, inv(cc)<br>
+ SDValue LHS = N->getOperand(0);<br>
+ if (LHS.getOpcode() != ISD::SELECT_CC) {<br>
+ return SDValue();<br>
+ }<br>
+<br>
+ SDValue RHS = N->getOperand(1);<br>
+ SDValue True = N->getOperand(2);<br>
+ SDValue False = N->getOperand(3);<br>
+<br>
+ if (LHS.getOperand(2).getNode() != True.getNode() ||<br>
+ LHS.getOperand(3).getNode() != False.getNode() ||<br>
+ RHS.getNode() != False.getNode() ||<br>
+ cast<CondCodeSDNode>(N->getOperand(4))->get() != ISD::SETEQ) {<br>
+ return SDValue();<br>
+ }<br>
+<br>
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(LHS->getOperand(4))->get();<br>
+ CCOpcode = ISD::getSetCCInverse(<br>
+ CCOpcode, LHS.getOperand(0).getValueType().isInteger());<br>
+ return DAG.getSelectCC(N->getDebugLoc(),<br>
+ LHS.getOperand(0),<br>
+ LHS.getOperand(1),<br>
+ LHS.getOperand(2),<br>
+ LHS.getOperand(3),<br>
+ CCOpcode);<br>
+<br>
+ }<br>
}<br>
return SDValue();<br>
}<br>
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td<br>
index 75ac31f..a71434a 100644<br>
--- a/lib/Target/R600/R600Instructions.td<br>
+++ b/lib/Target/R600/R600Instructions.td<br>
@@ -711,6 +711,34 @@ def SNE : R600_2OP <<br>
COND_NE))]<br>
>;<br>
<br>
+def SETE_DX10 : R600_2OP <<br>
+ 0xC, "SETE_DX10",<br>
+ [(set R600_Reg32:$dst,<br>
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),<br>
+ COND_EQ))]<br>
+>;<br>
+<br>
+def SETGT_DX10 : R600_2OP <<br>
+ 0xD, "SETGT_DX10",<br>
+ [(set R600_Reg32:$dst,<br>
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),<br>
+ COND_GT))]<br>
+>;<br>
+<br>
+def SETGE_DX10 : R600_2OP <<br>
+ 0xE, "SETGE_DX10",<br>
+ [(set R600_Reg32:$dst,<br>
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),<br>
+ COND_GE))]<br>
+>;<br>
+<br>
+def SETNE_DX10 : R600_2OP <<br>
+ 0xF, "SETNE_DX10",<br>
+ [(set R600_Reg32:$dst,<br>
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),<br>
+ COND_NE))]<br>
+>;<br>
+<br>
def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;<br>
def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>;<br>
def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;<br>
@@ -1771,6 +1799,18 @@ def : Pat <<br>
(SGE R600_Reg32:$src1, R600_Reg32:$src0) <br>
>;<br>
<br>
+// SETGT_DX10 reverse args<br>
+def : Pat <<br>
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LT),<br>
+ (SETGT_DX10 R600_Reg32:$src1, R600_Reg32:$src0)<br>
+>;<br>
+<br>
+// SETGE_DX10 reverse args<br>
+def : Pat <<br>
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LE),<br>
+ (SETGE_DX10 R600_Reg32:$src1, R600_Reg32:$src0)<br>
+>;<br>
+<br>
// SETGT_INT reverse args<br>
def : Pat <<br>
(selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT),<br>
@@ -1809,12 +1849,24 @@ def : Pat <<br>
(SETE R600_Reg32:$src0, R600_Reg32:$src1)<br>
>;<br>
<br>
+//SETE_DX10 - 'true if ordered'<br>
+def : Pat <<br>
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETO),<br>
+ (SETE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)<br>
+>;<br>
+<br>
//SNE - 'true if unordered'<br>
def : Pat <<br>
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO),<br>
(SNE R600_Reg32:$src0, R600_Reg32:$src1)<br>
>;<br>
<br>
+//SETNE_DX10 - 'true if ordered'<br>
+def : Pat <<br>
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUO),<br>
+ (SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)<br>
+>;<br>
+<br>
def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>;<br>
def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>;<br>
def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>;<br>
diff --git a/test/CodeGen/R600/fcmp.ll b/test/CodeGen/R600/fcmp.ll<br>
index 1dcd07c..89f5e9e 100644<br>
--- a/test/CodeGen/R600/fcmp.ll<br>
+++ b/test/CodeGen/R600/fcmp.ll<br>
@@ -1,8 +1,6 @@<br>
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s<br>
<br>
-;CHECK: SETE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-;CHECK: MOV T{{[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}<br>
-;CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
+;CHECK: SETE_DX10 T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
<br>
define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {<br>
entry:<br>
diff --git a/test/CodeGen/R600/set-dx10.ll b/test/CodeGen/R600/set-dx10.ll<br>
new file mode 100644<br>
index 0000000..a9eb22f<br>
--- /dev/null<br>
+++ b/test/CodeGen/R600/set-dx10.ll<br>
@@ -0,0 +1,137 @@<br>
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s<br>
+<br>
+; These tests check that floating point comparisons which are used by select<br>
+; to store integer true (-1) and false (0) values are lowered one of the<br>
</blockquote>
<br>
"lowered one of the " -> "lowered to one of the"<br>
<br>
<blockquote type="cite">+; SET*DX10 instructions.<br>
+<br>
+; CHECK: @fcmp_une_select_fptosi<br>
+; CHECK: SETNE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)<br>
+define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {<br>
+entry:<br>
+ %0 = fcmp une float %in, 5.0<br>
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00<br>
+ %2 = fsub float -0.000000e+00, %1<br>
+ %3 = fptosi float %2 to i32<br>
+ store i32 %3, i32 addrspace(1)* %out<br>
+ ret void<br>
+}<br>
+<br>
+; CHECK: @fcmp_une_select_i32<br>
+; CHECK: SETNE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)<br>
+define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {<br>
+entry:<br>
+ %0 = fcmp une float %in, 5.0<br>
+ %1 = select i1 %0, i32 -1, i32 0<br>
+ store i32 %1, i32 addrspace(1)* %out<br>
+ ret void<br>
+}<br>
+<br>
+; CHECK: @fcmp_ueq_select_fptosi<br>
+; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)<br>
+define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) {<br>
+entry:<br>
+ %0 = fcmp ueq float %in, 5.0<br>
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00<br>
+ %2 = fsub float -0.000000e+00, %1<br>
+ %3 = fptosi float %2 to i32<br>
+ store i32 %3, i32 addrspace(1)* %out<br>
+ ret void<br>
+}<br>
+<br>
+; CHECK: @fcmp_ueq_select_i32<br>
+; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)<br>
+define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) {<br>
+entry:<br>
+ %0 = fcmp ueq float %in, 5.0<br>
+ %1 = select i1 %0, i32 -1, i32 0<br>
+ store i32 %1, i32 addrspace(1)* %out<br>
+ ret void<br>
+}<br>
+<br>
+; CHECK: @fcmp_ugt_select_fptosi<br>
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)<br>
+define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) {<br>
+entry:<br>
+ %0 = fcmp ugt float %in, 5.0<br>
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00<br>
+ %2 = fsub float -0.000000e+00, %1<br>
+ %3 = fptosi float %2 to i32<br>
+ store i32 %3, i32 addrspace(1)* %out<br>
+ ret void<br>
+}<br>
+<br>
+; CHECK: @fcmp_ugt_select_i32<br>
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)<br>
+define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) {<br>
+entry:<br>
+ %0 = fcmp ugt float %in, 5.0<br>
+ %1 = select i1 %0, i32 -1, i32 0<br>
+ store i32 %1, i32 addrspace(1)* %out<br>
+ ret void<br>
+}<br>
+<br>
+; CHECK: @fcmp_uge_select_fptosi<br>
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)<br>
+define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) {<br>
+entry:<br>
+ %0 = fcmp uge float %in, 5.0<br>
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00<br>
+ %2 = fsub float -0.000000e+00, %1<br>
+ %3 = fptosi float %2 to i32<br>
+ store i32 %3, i32 addrspace(1)* %out<br>
+ ret void<br>
+}<br>
+<br>
+; CHECK: @fcmp_uge_select_i32<br>
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)<br>
+define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) {<br>
+entry:<br>
+ %0 = fcmp uge float %in, 5.0<br>
+ %1 = select i1 %0, i32 -1, i32 0<br>
+ store i32 %1, i32 addrspace(1)* %out<br>
+ ret void<br>
+}<br>
+<br>
+; CHECK: @fcmp_ule_select_fptosi<br>
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)<br>
+define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) {<br>
+entry:<br>
+ %0 = fcmp ule float %in, 5.0<br>
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00<br>
+ %2 = fsub float -0.000000e+00, %1<br>
+ %3 = fptosi float %2 to i32<br>
+ store i32 %3, i32 addrspace(1)* %out<br>
+ ret void<br>
+}<br>
+<br>
+; CHECK: @fcmp_ule_select_i32<br>
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)<br>
+define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) {<br>
+entry:<br>
+ %0 = fcmp ule float %in, 5.0<br>
+ %1 = select i1 %0, i32 -1, i32 0<br>
+ store i32 %1, i32 addrspace(1)* %out<br>
+ ret void<br>
+}<br>
+<br>
+; CHECK: @fcmp_ult_select_fptosi<br>
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)<br>
+define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) {<br>
+entry:<br>
+ %0 = fcmp ult float %in, 5.0<br>
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00<br>
+ %2 = fsub float -0.000000e+00, %1<br>
+ %3 = fptosi float %2 to i32<br>
+ store i32 %3, i32 addrspace(1)* %out<br>
+ ret void<br>
+}<br>
+<br>
+; CHECK: @fcmp_ult_select_i32<br>
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)<br>
+define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) {<br>
+entry:<br>
+ %0 = fcmp ult float %in, 5.0<br>
+ %1 = select i1 %0, i32 -1, i32 0<br>
+ store i32 %1, i32 addrspace(1)* %out<br>
+ ret void<br>
+}<br>
diff --git a/test/CodeGen/R600/unsuported-cc.ll b/test/CodeGen/R600/unsuported-cc.ll<br>
index 5d4c782..b48c591 100644<br>
--- a/test/CodeGen/R600/unsuported-cc.ll<br>
+++ b/test/CodeGen/R600/unsuported-cc.ll<br>
</blockquote>
<br>
Not related to this patch, but should we rename this file to unsupported-cc.ll (2 p's).<br>
<br>
I'm not qualified to do a technical review of this, so all you get is spelling and grammar :)<br>
<br>
--Aaron W<br>
<br>
<blockquote type="cite">@@ -24,21 +24,21 @@ entry:<br>
<br>
; CHECK: @ult_float<br>
; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)<br>
-define void @ult_float(i32 addrspace(1)* %out, float %in) {<br>
+define void @ult_float(float addrspace(1)* %out, float %in) {<br>
entry:<br>
%0 = fcmp ult float %in, 5.0<br>
- %1 = select i1 %0, i32 -1, i32 0<br>
- store i32 %1, i32 addrspace(1)* %out<br>
+ %1 = select i1 %0, float 1.0, float 0.0<br>
+ store float %1, float addrspace(1)* %out<br>
ret void<br>
}<br>
<br>
; CHECK: @olt<br>
; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)<br>
-define void @olt(i32 addrspace(1)* %out, float %in) {<br>
+define void @olt(float addrspace(1)* %out, float %in) {<br>
entry:<br>
%0 = fcmp olt float %in, 5.0<br>
- %1 = select i1 %0, i32 -1, i32 0<br>
- store i32 %1, i32 addrspace(1)* %out<br>
+ %1 = select i1 %0, float 1.0, float 0.0<br>
+ store float %1, float addrspace(1)* %out<br>
ret void<br>
}<br>
<br>
@@ -64,20 +64,20 @@ entry:<br>
<br>
; CHECK: @ule_float<br>
; CHECK: SETGE T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)<br>
-define void @ule_float(i32 addrspace(1)* %out, float %in) {<br>
+define void @ule_float(float addrspace(1)* %out, float %in) {<br>
entry:<br>
%0 = fcmp ule float %in, 5.0<br>
- %1 = select i1 %0, i32 -1, i32 0<br>
- store i32 %1, i32 addrspace(1)* %out<br>
+ %1 = select i1 %0, float 1.0, float 0.0<br>
+ store float %1, float addrspace(1)* %out<br>
ret void<br>
}<br>
<br>
; CHECK: @ole<br>
; CHECK: SETGE T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)<br>
-define void @ole(i32 addrspace(1)* %out, float %in) {<br>
+define void @ole(float addrspace(1)* %out, float %in) {<br>
entry:<br>
%0 = fcmp ole float %in, 5.0<br>
- %1 = select i1 %0, i32 -1, i32 0<br>
- store i32 %1, i32 addrspace(1)* %out<br>
+ %1 = select i1 %0, float 1.0, float 0.0<br>
+ store float %1, float addrspace(1)* %out<br>
ret void<br>
}<br>
-- <br>
1.7.8.6<br>
<br>
</blockquote>
</div>