[Mesa-dev] [PATCH] R600/SI: Use MULADD_IEEE/V_MAD_F32 instruction for mad pattern
Vincent Lejeune
vljn at ovi.com
Sun Feb 10 10:38:51 PST 2013
---
lib/Target/R600/AMDGPUISelLowering.cpp | 10 +++-------
lib/Target/R600/AMDGPUISelLowering.h | 1 -
lib/Target/R600/AMDILISelLowering.cpp | 3 ++-
lib/Target/R600/AMDILInstrInfo.td | 1 -
lib/Target/R600/AMDILIntrinsics.td | 10 ----------
lib/Target/R600/R600Instructions.td | 9 ++++++++-
lib/Target/R600/SIInstructions.td | 4 ++--
test/CodeGen/R600/fmad.ll | 19 +++++++++++++++++++
8 files changed, 34 insertions(+), 23 deletions(-)
create mode 100644 test/CodeGen/R600/fmad.ll
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index d0d23d6..0a33264 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -127,9 +127,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return LowerIntrinsicLRP(Op, DAG);
case AMDGPUIntrinsic::AMDIL_fraction:
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
- case AMDGPUIntrinsic::AMDIL_mad:
- return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
- Op.getOperand(2), Op.getOperand(3));
case AMDGPUIntrinsic::AMDIL_max:
return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
Op.getOperand(2));
@@ -176,9 +173,9 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
Op.getOperand(1));
SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
Op.getOperand(3));
- return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
- Op.getOperand(2),
- OneSubAC);
+ return DAG.getNode(ISD::FADD, DL, VT,
+ DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
+ OneSubAC);
}
/// \brief Generate Min/Max node
@@ -393,7 +390,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return 0;
// AMDIL DAG nodes
- NODE_NAME_CASE(MAD);
NODE_NAME_CASE(CALL);
NODE_NAME_CASE(UMUL);
NODE_NAME_CASE(DIV_INF);
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index 4b844a3..f27b5db 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -108,7 +108,6 @@ namespace AMDGPUISD {
enum {
// AMDIL ISD Opcodes
FIRST_NUMBER = ISD::BUILTIN_OP_END,
- MAD, // 32bit Fused Multiply Add instruction
CALL, // Function call based on a single integer
UMUL, // 32bit unsigned multiplication
DIV_INF, // Divide with infinity returned on zero divisor
diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp
index 2e60adc..3480ac8 100644
--- a/lib/Target/R600/AMDILISelLowering.cpp
+++ b/lib/Target/R600/AMDILISelLowering.cpp
@@ -451,7 +451,8 @@ AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
// float fr = mad(fqneg, fb, fa);
- SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
+ SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
+ DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
// int iq = (int)fq;
SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
diff --git a/lib/Target/R600/AMDILInstrInfo.td b/lib/Target/R600/AMDILInstrInfo.td
index e969bbf..110f147 100644
--- a/lib/Target/R600/AMDILInstrInfo.td
+++ b/lib/Target/R600/AMDILInstrInfo.td
@@ -116,7 +116,6 @@ def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
//===--------------------------------------------------------------------===//
// Floating point math functions
def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>;
-def IL_mad : SDNode<"AMDGPUISD::MAD", SDTIL_GenTernaryOp>;
//===----------------------------------------------------------------------===//
// Integer functions
diff --git a/lib/Target/R600/AMDILIntrinsics.td b/lib/Target/R600/AMDILIntrinsics.td
index 3f9e20f..6ec3559 100644
--- a/lib/Target/R600/AMDILIntrinsics.td
+++ b/lib/Target/R600/AMDILIntrinsics.td
@@ -92,12 +92,6 @@ let TargetPrefix = "AMDIL", isTarget = 1 in {
TernaryIntInt;
def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
BinaryIntInt;
- def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">,
- TernaryIntInt;
- def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">,
- TernaryIntInt;
- def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">,
- TernaryIntFloat;
def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
BinaryIntInt;
def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
@@ -110,10 +104,6 @@ let TargetPrefix = "AMDIL", isTarget = 1 in {
BinaryIntInt;
def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
BinaryIntInt;
- def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">,
- TernaryIntInt;
- def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">,
- TernaryIntInt;
def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
BinaryIntInt;
def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 591f66d..beb9766 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -959,8 +959,13 @@ class MUL_LIT_Common <bits<5> inst> : R600_3OP <
class MULADD_Common <bits<5> inst> : R600_3OP <
inst, "MULADD",
+ []
+>;
+
+class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
+ inst, "MULADD_IEEE",
[(set (f32 R600_Reg32:$dst),
- (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))]
+ (fadd (fmul R600_Reg32:$src0, R600_Reg32:$src1), R600_Reg32:$src2))]
>;
class CNDE_Common <bits<5> inst> : R600_3OP <
@@ -1117,6 +1122,7 @@ let Predicates = [isR600] in {
def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
def MULADD_r600 : MULADD_Common<0x10>;
+ def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>;
def CNDE_r600 : CNDE_Common<0x18>;
def CNDGT_r600 : CNDGT_Common<0x19>;
def CNDGE_r600 : CNDGE_Common<0x1A>;
@@ -1256,6 +1262,7 @@ let Predicates = [isEGorCayman] in {
>;
def MULADD_eg : MULADD_Common<0x14>;
+ def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
def ASHR_eg : ASHR_Common<0x15>;
def LSHR_eg : LSHR_Common<0x16>;
def LSHL_eg : LSHL_Common<0x17>;
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index a09f243..7e50e86 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1423,8 +1423,8 @@ def : Pat <
/********** VOP3 Patterns **********/
/********** ================== **********/
-def : Pat <(f32 (IL_mad AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2)),
- (V_MAD_LEGACY_F32 AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2,
+def : Pat <(f32 (fadd (fmul AllReg_32:$src0, VReg_32:$src1), VReg_32:$src2)),
+ (V_MAD_F32 AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2,
0, 0, 0, 0)>;
/********** ================== **********/
diff --git a/test/CodeGen/R600/fmad.ll b/test/CodeGen/R600/fmad.ll
new file mode 100644
index 0000000..a3d4d0f
--- /dev/null
+++ b/test/CodeGen/R600/fmad.ll
@@ -0,0 +1,19 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: MULADD_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+ %r0 = call float @llvm.R600.load.input(i32 0)
+ %r1 = call float @llvm.R600.load.input(i32 1)
+ %r2 = call float @llvm.R600.load.input(i32 2)
+ %r3 = fmul float %r0, %r1
+ %r4 = fadd float %r3, %r2
+ call void @llvm.AMDGPU.store.output(float %r4, i32 0)
+ ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @fabs(float ) readnone
--
1.8.1.2
More information about the mesa-dev
mailing list