[Mesa-dev] [PATCH 5/5] R600: Fold immediates into ALU instructions when possible v2
Tom Stellard
tom at stellard.net
Wed Nov 28 14:50:11 PST 2012
From: Tom Stellard <thomas.stellard at amd.com>
v2:
- Fold the immediates using the SelectionDAG
---
lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp | 94 +++++++++++++++++++++++++
lib/Target/AMDGPU/R600InstrInfo.cpp | 16 ++++-
lib/Target/AMDGPU/R600InstrInfo.h | 7 ++
test/CodeGen/R600/fcmp-cnd.ll | 4 +-
test/CodeGen/R600/fcmp-cnde-int-args.ll | 2 +-
test/CodeGen/R600/literals.ll | 30 ++++++++
test/CodeGen/R600/selectcc-icmp-select-float.ll | 2 +-
test/CodeGen/R600/selectcc_cnde.ll | 2 +-
test/CodeGen/R600/selectcc_cnde_int.ll | 2 +-
9 files changed, 153 insertions(+), 6 deletions(-)
create mode 100644 test/CodeGen/R600/literals.ll
diff --git a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
index 10ce6ad..2a80f1b 100644
--- a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
@@ -14,6 +14,7 @@
#include "AMDGPUISelLowering.h" // For AMDGPUISD
#include "AMDGPURegisterInfo.h"
#include "AMDILDevices.h"
+#include "R600InstrInfo.h"
#include "llvm/ADT/ValueMap.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
@@ -167,6 +168,99 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
}
break;
+ case ISD::ConstantFP:
+ case ISD::Constant:
+ {
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ // XXX: Custom immediate lowering not implemented yet. Instead we use
+ // pseudo instructions defined in SIInstructions.td
+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ break;
+ }
+ const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());
+
+ uint64_t ImmValue = 0;
+ unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
+
+ if (N->getOpcode() == ISD::ConstantFP) {
+ // XXX: 64-bit Immediates not supported yet
+ assert(N->getValueType(0) != MVT::f64);
+
+ ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
+ APFloat Value = C->getValueAPF();
+ float FloatValue = Value.convertToFloat();
+ if (FloatValue == 0.0) {
+ ImmReg = AMDGPU::ZERO;
+ } else if (FloatValue == 0.5) {
+ ImmReg = AMDGPU::HALF;
+ } else if (FloatValue == 1.0) {
+ ImmReg = AMDGPU::ONE;
+ } else {
+ ImmValue = Value.bitcastToAPInt().getZExtValue();
+ }
+ } else {
+ // XXX: 64-bit Immediates not supported yet
+ assert(N->getValueType(0) != MVT::i64);
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+ if (C->getZExtValue() == 0) {
+ ImmReg = AMDGPU::ZERO;
+ } else if (C->getZExtValue() == 1) {
+ ImmReg = AMDGPU::ONE_INT;
+ } else {
+ ImmValue = C->getZExtValue();
+ }
+ }
+
+ for (SDNode::use_iterator Use = N->use_begin(), E = SDNode::use_end();
+ Use != E; ++Use) {
+ std::vector<SDValue> Ops;
+ for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
+ Ops.push_back(Use->getOperand(i));
+ }
+
+ if (!Use->isMachineOpcode()) {
+ if (ImmReg == AMDGPU::ALU_LITERAL_X) {
+ // We can only use literal constants (e.g. AMDGPU::ZERO,
+ // AMDGPU::ONE, etc) in machine opcodes.
+ continue;
+ }
+ } else {
+ if (!TII->isALUInstr(Use->getMachineOpcode())) {
+ continue;
+ }
+
+ int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), R600Operands::IMM);
+ assert(ImmIdx != -1);
+
+ // subtract one from ImmIdx, because the DST operand is usually index
+ // 0 for MachineInstrs, but we have no DST in the Ops vector.
+ ImmIdx--;
+
+ // Check that we aren't already using an immediate.
+ // XXX: It's possible for an instruction to have more than one
+ // immediate operand, but this is not supported yet.
+ if (ImmReg == AMDGPU::ALU_LITERAL_X) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
+ assert(C);
+
+ if (C->getZExtValue() != 0) {
+ // This instruction is already using an immediate.
+ continue;
+ }
+
+ // Set the immediate value
+ Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
+ }
+ }
+ // Set the immediate register
+ Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);
+
+ CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
+ }
+ break;
+ }
+
}
return SelectCode(N);
}
diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp
index 20b1aa3..814e0a2 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -127,6 +127,15 @@ bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
}
}
+bool R600InstrInfo::isALUInstr(unsigned Opcode) const
+{
+ unsigned TargetFlags = get(Opcode).TSFlags;
+
+ return ((TargetFlags & R600_InstFlag::OP1) |
+ (TargetFlags & R600_InstFlag::OP2) |
+ (TargetFlags & R600_InstFlag::OP3));
+}
+
DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
const ScheduleDAG *DAG) const {
const InstrItineraryData *II = TM->getInstrItineraryData();
@@ -505,6 +514,11 @@ MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
R600Operands::Ops Op) const {
+ return getOperandIdx(MI.getOpcode(), Op);
+}
+
+int R600InstrInfo::getOperandIdx(unsigned Opcode,
+ R600Operands::Ops Op) const {
const static int OpTable[3][R600Operands::COUNT] = {
// W C S S S S S S S S
// R O D L S R R R S R R R S R R L P
@@ -515,7 +529,7 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
{0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,-1,-1,-1,13,14,15,16,17},
{0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8,-1, 9,10,11,12,13,14}
};
- unsigned TargetFlags = get(MI.getOpcode()).TSFlags;
+ unsigned TargetFlags = get(Opcode).TSFlags;
unsigned OpTableIdx;
if (!HAS_NATIVE_OPERANDS(TargetFlags)) {
diff --git a/lib/Target/AMDGPU/R600InstrInfo.h b/lib/Target/AMDGPU/R600InstrInfo.h
index cec1c3b..81e1828 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/lib/Target/AMDGPU/R600InstrInfo.h
@@ -50,6 +50,9 @@ namespace llvm {
bool isReductionOp(unsigned opcode) const;
bool isCubeOp(unsigned opcode) const;
+ /// isALUInstr - Returns true if this Opcode represents an ALU instruction.
+ bool isALUInstr(unsigned Opcode) const;
+
/// isVector - Vector instructions are instructions that must fill all
/// instruction slots within an instruction group.
bool isVector(const MachineInstr &MI) const;
@@ -130,6 +133,10 @@ namespace llvm {
/// if the Instruction does not contain the specified Op.
int getOperandIdx(const MachineInstr &MI, R600Operands::Ops Op) const;
+ /// getOperandIdx - Get the index of Op for the given Opcode. Returns -1
+ /// if the Instruction does not contain the specified Op.
+ int getOperandIdx(unsigned Opcode, R600Operands::Ops Op) const;
+
/// setImmOperand - Helper function for setting instruction flag values.
void setImmOperand(MachineInstr *MI, R600Operands::Ops Op, int64_t Imm) const;
diff --git a/test/CodeGen/R600/fcmp-cnd.ll b/test/CodeGen/R600/fcmp-cnd.ll
index c6b6236..a94cfb5 100644
--- a/test/CodeGen/R600/fcmp-cnd.ll
+++ b/test/CodeGen/R600/fcmp-cnd.ll
@@ -1,6 +1,8 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;Not checking arguments 2 and 3 to CNDE, because they may change between
+;registers and literal.x depending on what the optimizer does.
+;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
entry:
diff --git a/test/CodeGen/R600/fcmp-cnde-int-args.ll b/test/CodeGen/R600/fcmp-cnde-int-args.ll
index 92f3b5f..5c981ef 100644
--- a/test/CodeGen/R600/fcmp-cnde-int-args.ll
+++ b/test/CodeGen/R600/fcmp-cnde-int-args.ll
@@ -4,7 +4,7 @@
; chance to optimize the fcmp + select instructions to CNDE was missed
; due to the fact that the operands to fcmp and select had different types
-;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, 0.0, -1}}
define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
entry:
diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll
new file mode 100644
index 0000000..4c731b2
--- /dev/null
+++ b/test/CodeGen/R600/literals.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; Test using an integer literal constant.
+; Generated ASM should be:
+; ADD_INT REG literal.x, 5
+; or
+; ADD_INT literal.x REG, 5
+
+; CHECK: ADD_INT {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} 5
+define void @i32_literal(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = add i32 5, %in
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; Test using a float literal constant.
+; Generated ASM should be:
+; ADD REG literal.x, 5.0
+; or
+; ADD literal.x REG, 5.0
+
+; CHECK: ADD {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} {{[0-9]+}}(5.0
+define void @float_literal(float addrspace(1)* %out, float %in) {
+entry:
+ %0 = fadd float 5.0, %in
+ store float %0, float addrspace(1)* %out
+ ret void
+}
+
diff --git a/test/CodeGen/R600/selectcc-icmp-select-float.ll b/test/CodeGen/R600/selectcc-icmp-select-float.ll
index f1f8ab1..f65a300 100644
--- a/test/CodeGen/R600/selectcc-icmp-select-float.ll
+++ b/test/CodeGen/R600/selectcc-icmp-select-float.ll
@@ -2,7 +2,7 @@
; Note additional optimizations may cause this SGT to be replaced with a
; CND* instruction.
-; CHECK: SGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: SGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, -1}}
; Test a selectcc with i32 LHS/RHS and float True/False
define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) {
diff --git a/test/CodeGen/R600/selectcc_cnde.ll b/test/CodeGen/R600/selectcc_cnde.ll
index e06a170..f0a0f51 100644
--- a/test/CodeGen/R600/selectcc_cnde.ll
+++ b/test/CodeGen/R600/selectcc_cnde.ll
@@ -1,7 +1,7 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
;CHECK-NOT: SETE
-;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1.0, literal.x, [-0-9]+\(2.0}}
define void @test(float addrspace(1)* %out, float addrspace(1)* %in) {
%1 = load float addrspace(1)* %in
%2 = fcmp oeq float %1, 0.0
diff --git a/test/CodeGen/R600/selectcc_cnde_int.ll b/test/CodeGen/R600/selectcc_cnde_int.ll
index 03d000f..b38078e 100644
--- a/test/CodeGen/R600/selectcc_cnde_int.ll
+++ b/test/CodeGen/R600/selectcc_cnde_int.ll
@@ -1,7 +1,7 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
;CHECK-NOT: SETE_INT
-;CHECK: CNDE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: CNDE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1, literal.x, 2}}
define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%1 = load i32 addrspace(1)* %in
%2 = icmp eq i32 %1, 0
--
1.7.11.4
More information about the mesa-dev
mailing list