[Mesa-dev] [PATCH 3/3] R600: Add support for SET*_DX10 instructions
Aaron Watry
awatry at gmail.com
Thu Jan 31 17:49:41 PST 2013
From: Tom Stellard <thomas.stellard at amd.com>
These instructions compare two floating point values and return an
integer true (-1) or false (0) value.
When compiling code generated by the Mesa GLSL frontend, the SET*_DX10
instructions save us four instructions for most branch decisions that
use floating-point comparisons.
---
lib/Target/R600/R600ISelLowering.cpp | 108 +++++++++++++++++++-------
lib/Target/R600/R600Instructions.td | 52 +++++++++++++
test/CodeGen/R600/fcmp.ll | 4 +-
test/CodeGen/R600/set-dx10.ll | 137
++++++++++++++++++++++++++++++++++
test/CodeGen/R600/unsuported-cc.ll | 24 +++---
5 files changed, 281 insertions(+), 44 deletions(-)
create mode 100644 test/CodeGen/R600/set-dx10.ll
diff --git a/lib/Target/R600/R600ISelLowering.cpp
b/lib/Target/R600/R600ISelLowering.cpp
index abfee16..c4aa172 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -90,7 +90,9 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM)
:
setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
setTargetDAGCombine(ISD::FP_ROUND);
+ setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::SELECT_CC);
setSchedulingPreference(Sched::VLIW);
}
@@ -663,9 +665,12 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op,
SelectionDAG &DAG) const
}
// Try to lower to a SET* instruction:
- // We need all the operands of SELECT_CC to have the same value type, so
if
- // necessary we need to change True and False to be the same type as LHS
and
- // RHS, and then convert the result of the select_cc back to the correct
type.
+ //
+ // CompareVT == MVT::f32 and VT == MVT::i32 is supported by the hardware,
+ // but for the other case where CompareVT != VT, all operands of
+ // SELECT_CC to have the same value type, so we need to change True and
False
"all operands of SELECT_CC to have". Maybe "need to have"?
+ // to be the same type as LHS and RHS, and then convert the result of the
+ // select_cc back to the correct type.
// Move hardware True/False values to the correct operand.
if (isHWTrueValue(False) && isHWFalseValue(True)) {
@@ -675,32 +680,17 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue
Op, SelectionDAG &DAG) const
}
if (isHWTrueValue(True) && isHWFalseValue(False)) {
- if (CompareVT != VT) {
- if (VT == MVT::f32 && CompareVT == MVT::i32) {
- SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
- LHS, RHS,
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- CC);
- // Convert integer values of true (-1) and false (0) to fp values
of
- // true (1.0f) and false (0.0f).
- SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
- DAG.getConstant(1,
MVT::i32));
- return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
- } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
- SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
- LHS, RHS,
- DAG.getConstantFP(1.0f, MVT::f32),
- DAG.getConstantFP(0.0f, MVT::f32),
- CC);
- // Convert fp values of true (1.0f) and false (0.0f) to integer
values
- // of true (-1) and false (0).
- SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt);
- return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg);
- } else {
- // I don't think there will be any other type pairings.
- assert(!"Unhandled operand type parings in SELECT_CC");
- }
+ if (CompareVT != VT && VT == MVT::f32 && CompareVT == MVT::i32) {
+ SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
+ LHS, RHS,
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ CC);
+ // Convert integer values of true (-1) and false (0) to fp values of
+ // true (1.0f) and false (0.0f).
+ SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
+ DAG.getConstant(1,
MVT::i32));
+ return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
} else {
// This SELECT_CC is already legal.
return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False,
CC);
@@ -1121,6 +1111,35 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode
*N,
}
break;
}
+
+ // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
+ // (i32 select_cc f32, f32, -1, 0 cc)
+ //
+ // Mesa's GLSL frontend generates the above pattern a lot and we can
lower
+ // this to one of the SET*_DX10 instructions.
+ case ISD::FP_TO_SINT: {
+ SDValue FNeg = N->getOperand(0);
+ if (FNeg.getOpcode() != ISD::FNEG) {
+ return SDValue();
+ }
+ SDValue SelectCC = FNeg.getOperand(0);
+ if (SelectCC.getOpcode() != ISD::SELECT_CC ||
+ SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
+ SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
+ !isHWTrueValue(SelectCC.getOperand(2)) ||
+ !isHWFalseValue(SelectCC.getOperand(3))) {
+ return SDValue();
+ }
+
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
N->getValueType(0),
+ SelectCC.getOperand(0), // LHS
+ SelectCC.getOperand(1), // RHS
+ DAG.getConstant(-1, MVT::i32), // True
+ DAG.getConstant(0, MVT::i32), // Flase
+ SelectCC.getOperand(4)); // CC
+
+ break;
+ }
// Extract_vec (Build_vector) generated by custom lowering
// also needs to be customly combined
case ISD::EXTRACT_VECTOR_ELT: {
@@ -1140,6 +1159,37 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode
*N,
}
}
}
+
+ case ISD::SELECT_CC: {
+ // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
+ // selectcc x, y, a, b, inv(cc)
+ SDValue LHS = N->getOperand(0);
+ if (LHS.getOpcode() != ISD::SELECT_CC) {
+ return SDValue();
+ }
+
+ SDValue RHS = N->getOperand(1);
+ SDValue True = N->getOperand(2);
+ SDValue False = N->getOperand(3);
+
+ if (LHS.getOperand(2).getNode() != True.getNode() ||
+ LHS.getOperand(3).getNode() != False.getNode() ||
+ RHS.getNode() != False.getNode() ||
+ cast<CondCodeSDNode>(N->getOperand(4))->get() != ISD::SETEQ) {
+ return SDValue();
+ }
+
+ ISD::CondCode CCOpcode =
cast<CondCodeSDNode>(LHS->getOperand(4))->get();
+ CCOpcode = ISD::getSetCCInverse(
+ CCOpcode,
LHS.getOperand(0).getValueType().isInteger());
+ return DAG.getSelectCC(N->getDebugLoc(),
+ LHS.getOperand(0),
+ LHS.getOperand(1),
+ LHS.getOperand(2),
+ LHS.getOperand(3),
+ CCOpcode);
+
+ }
}
return SDValue();
}
diff --git a/lib/Target/R600/R600Instructions.td
b/lib/Target/R600/R600Instructions.td
index 75ac31f..a71434a 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -711,6 +711,34 @@ def SNE : R600_2OP <
COND_NE))]
>;
+def SETE_DX10 : R600_2OP <
+ 0xC, "SETE_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_EQ))]
+>;
+
+def SETGT_DX10 : R600_2OP <
+ 0xD, "SETGT_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_GT))]
+>;
+
+def SETGE_DX10 : R600_2OP <
+ 0xE, "SETGE_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_GE))]
+>;
+
+def SETNE_DX10 : R600_2OP <
+ 0xF, "SETNE_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_NE))]
+>;
+
def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>;
def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
@@ -1771,6 +1799,18 @@ def : Pat <
(SGE R600_Reg32:$src1, R600_Reg32:$src0)
>;
+// SETGT_DX10 reverse args
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LT),
+ (SETGT_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGE_DX10 reverse args
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LE),
+ (SETGE_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
// SETGT_INT reverse args
def : Pat <
(selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT),
@@ -1809,12 +1849,24 @@ def : Pat <
(SETE R600_Reg32:$src0, R600_Reg32:$src1)
>;
+//SETE_DX10 - 'true if ordered'
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETO),
+ (SETE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
//SNE - 'true if unordered'
def : Pat <
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
SETUO),
(SNE R600_Reg32:$src0, R600_Reg32:$src1)
>;
+//SETNE_DX10 - 'true if ordered'
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUO),
+ (SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>;
def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>;
def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>;
diff --git a/test/CodeGen/R600/fcmp.ll b/test/CodeGen/R600/fcmp.ll
index 1dcd07c..89f5e9e 100644
--- a/test/CodeGen/R600/fcmp.ll
+++ b/test/CodeGen/R600/fcmp.ll
@@ -1,8 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-;CHECK: SETE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MOV T{{[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
-;CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: SETE_DX10 T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
entry:
diff --git a/test/CodeGen/R600/set-dx10.ll b/test/CodeGen/R600/set-dx10.ll
new file mode 100644
index 0000000..a9eb22f
--- /dev/null
+++ b/test/CodeGen/R600/set-dx10.ll
@@ -0,0 +1,137 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; These tests check that floating point comparisons which are used by
select
+; to store integer true (-1) and false (0) values are lowered one of the
"lowered one of the " -> "lowered to one of the"
+; SET*DX10 instructions.
+
+; CHECK: @fcmp_une_select_fptosi
+; CHECK: SETNE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
1084227584(5.000000e+00)
+define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp une float %in, 5.0
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+ %2 = fsub float -0.000000e+00, %1
+ %3 = fptosi float %2 to i32
+ store i32 %3, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_une_select_i32
+; CHECK: SETNE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
1084227584(5.000000e+00)
+define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp une float %in, 5.0
+ %1 = select i1 %0, i32 -1, i32 0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_ueq_select_fptosi
+; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
1084227584(5.000000e+00)
+define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ueq float %in, 5.0
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+ %2 = fsub float -0.000000e+00, %1
+ %3 = fptosi float %2 to i32
+ store i32 %3, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_ueq_select_i32
+; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
1084227584(5.000000e+00)
+define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ueq float %in, 5.0
+ %1 = select i1 %0, i32 -1, i32 0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_ugt_select_fptosi
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
1084227584(5.000000e+00)
+define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ugt float %in, 5.0
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+ %2 = fsub float -0.000000e+00, %1
+ %3 = fptosi float %2 to i32
+ store i32 %3, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_ugt_select_i32
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
1084227584(5.000000e+00)
+define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ugt float %in, 5.0
+ %1 = select i1 %0, i32 -1, i32 0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_uge_select_fptosi
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
1084227584(5.000000e+00)
+define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp uge float %in, 5.0
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+ %2 = fsub float -0.000000e+00, %1
+ %3 = fptosi float %2 to i32
+ store i32 %3, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_uge_select_i32
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
1084227584(5.000000e+00)
+define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp uge float %in, 5.0
+ %1 = select i1 %0, i32 -1, i32 0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_ule_select_fptosi
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}},
1084227584(5.000000e+00)
+define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ule float %in, 5.0
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+ %2 = fsub float -0.000000e+00, %1
+ %3 = fptosi float %2 to i32
+ store i32 %3, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_ule_select_i32
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}},
1084227584(5.000000e+00)
+define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ule float %in, 5.0
+ %1 = select i1 %0, i32 -1, i32 0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_ult_select_fptosi
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}},
1084227584(5.000000e+00)
+define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ult float %in, 5.0
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+ %2 = fsub float -0.000000e+00, %1
+ %3 = fptosi float %2 to i32
+ store i32 %3, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_ult_select_i32
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}},
1084227584(5.000000e+00)
+define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ult float %in, 5.0
+ %1 = select i1 %0, i32 -1, i32 0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/unsuported-cc.ll
b/test/CodeGen/R600/unsuported-cc.ll
index 5d4c782..b48c591 100644
--- a/test/CodeGen/R600/unsuported-cc.ll
+++ b/test/CodeGen/R600/unsuported-cc.ll
Not related to this patch, but should we rename this file to
unsupported-cc.ll (2 p's).
I'm not qualified to do a technical review of this, so all you get is
spelling and grammar :)
--Aaron W
@@ -24,21 +24,21 @@ entry:
; CHECK: @ult_float
; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
1084227584(5.000000e+00)
-define void @ult_float(i32 addrspace(1)* %out, float %in) {
+define void @ult_float(float addrspace(1)* %out, float %in) {
entry:
%0 = fcmp ult float %in, 5.0
- %1 = select i1 %0, i32 -1, i32 0
- store i32 %1, i32 addrspace(1)* %out
+ %1 = select i1 %0, float 1.0, float 0.0
+ store float %1, float addrspace(1)* %out
ret void
}
; CHECK: @olt
; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
1084227584(5.000000e+00)
-define void @olt(i32 addrspace(1)* %out, float %in) {
+define void @olt(float addrspace(1)* %out, float %in) {
entry:
%0 = fcmp olt float %in, 5.0
- %1 = select i1 %0, i32 -1, i32 0
- store i32 %1, i32 addrspace(1)* %out
+ %1 = select i1 %0, float 1.0, float 0.0
+ store float %1, float addrspace(1)* %out
ret void
}
@@ -64,20 +64,20 @@ entry:
; CHECK: @ule_float
; CHECK: SETGE T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
1084227584(5.000000e+00)
-define void @ule_float(i32 addrspace(1)* %out, float %in) {
+define void @ule_float(float addrspace(1)* %out, float %in) {
entry:
%0 = fcmp ule float %in, 5.0
- %1 = select i1 %0, i32 -1, i32 0
- store i32 %1, i32 addrspace(1)* %out
+ %1 = select i1 %0, float 1.0, float 0.0
+ store float %1, float addrspace(1)* %out
ret void
}
; CHECK: @ole
; CHECK: SETGE T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
1084227584(5.000000e+00)
-define void @ole(i32 addrspace(1)* %out, float %in) {
+define void @ole(float addrspace(1)* %out, float %in) {
entry:
%0 = fcmp ole float %in, 5.0
- %1 = select i1 %0, i32 -1, i32 0
- store i32 %1, i32 addrspace(1)* %out
+ %1 = select i1 %0, float 1.0, float 0.0
+ store float %1, float addrspace(1)* %out
ret void
}
--
1.7.8.6
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20130131/5b642fba/attachment-0001.html>
More information about the mesa-dev
mailing list