[Mesa-dev] [PATCH 4/4] R600: Fold immediates into ALU instructions when possible
Tom Stellard
tom at stellard.net
Fri Nov 9 12:47:37 PST 2012
From: Tom Stellard <thomas.stellard at amd.com>
---
lib/Target/AMDGPU/R600ISelLowering.cpp | 65 ++++++++++++++++++++++---
lib/Target/AMDGPU/R600InstrInfo.cpp | 40 +++++++++++++++
lib/Target/AMDGPU/R600InstrInfo.h | 12 +++++
test/CodeGen/R600/fcmp-cnd.ll | 4 +-
test/CodeGen/R600/fcmp-cnde-int-args.ll | 2 +-
test/CodeGen/R600/selectcc-icmp-select-float.ll | 2 +-
test/CodeGen/R600/selectcc_cnde.ll | 2 +-
test/CodeGen/R600/selectcc_cnde_int.ll | 2 +-
8 files changed, 117 insertions(+), 12 deletions(-)
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 75a2a90..dc2247a 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -131,15 +131,66 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
}
case AMDGPU::MOV_IMM_F32:
- TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
- MI->getOperand(1).getFPImm()->getValueAPF()
- .bitcastToAPInt().getZExtValue());
- break;
case AMDGPU::MOV_IMM_I32:
- TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
- MI->getOperand(1).getImm());
- break;
+ {
+ unsigned DstReg = MI->getOperand(0).getReg();
+ bool CanReplace = true;
+ int64_t Imm;
+ unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
+ bool IsInlineConstant = false;
+
+ if (MI->getOpcode() == AMDGPU::MOV_IMM_F32) {
+ APFloat Value = MI->getOperand(1).getFPImm()->getValueAPF();
+ float FloatValue = Value.convertToFloat();
+ if (FloatValue == 0.0f) {
+ ImmReg = AMDGPU::ZERO;
+ IsInlineConstant = true;
+ } else if (FloatValue == 1.0f) {
+ ImmReg = AMDGPU::ONE;
+ IsInlineConstant = true;
+ }
+ Imm = Value.bitcastToAPInt().getZExtValue();
+ } else {
+ Imm = MI->getOperand(1).getImm();
+ switch (Imm) {
+ case 0:
+ ImmReg = AMDGPU::ZERO;
+ IsInlineConstant = true;
+ break;
+ case 1:
+ ImmReg = AMDGPU::ONE_INT;
+ IsInlineConstant = true;
+ break;
+ }
+ }
+
+ // Check all the uses of DstReg to make sure it is safe to fold the
+ // immediate.
+ for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(DstReg);
+ UI != MachineRegisterInfo::use_end(); ++UI) {
+ if (!TII->canUseImmediate(UI.getOperand()) &&
+ !(IsInlineConstant && TII->isALUInstr((*UI).getOpcode()))) {
+ CanReplace = false;
+ break;
+ }
+ }
+
+ if (!CanReplace) {
+ TII->buildMovImm(*BB, I, DstReg, Imm);
+ } else {
+ if (!IsInlineConstant) {
+ // Add the immediate to the use instructions
+ for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(DstReg);
+ UI != MachineRegisterInfo::use_end(); ++UI) {
+ TII->setImmOperand(&*UI, R600Operands::IMM, Imm);
+ }
+ }
+ // Update the use's register
+ MRI.replaceRegWith(DstReg, ImmReg);
+ }
+ break;
+ }
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp
index e437313..2ca5e45 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -138,6 +138,15 @@ bool R600InstrInfo::isCubeOp(unsigned Opcode) const
}
}
+bool R600InstrInfo::isALUInstr(unsigned Opcode) const
+{
+ unsigned TargetFlags = get(Opcode).TSFlags;
+
+ return ((TargetFlags & R600_InstFlag::OP1) |
+ (TargetFlags & R600_InstFlag::OP2) |
+ (TargetFlags & R600_InstFlag::OP3));
+}
+
DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
const ScheduleDAG *DAG) const
{
@@ -582,6 +591,37 @@ void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
MI->getOperand(Idx).setImm(Imm);
}
+bool R600InstrInfo::operandUsesImmediate(const MachineInstr &MI,
+ R600Operands::Ops Op) const
+{
+ int OpIndex = getOperandIdx(MI, Op);
+ return OpIndex != -1 &&
+ MI.getOperand(OpIndex).getReg() == AMDGPU::ALU_LITERAL_X;
+}
+
+bool R600InstrInfo::canUseImmediate(const MachineOperand &MO) const
+{
+ const MachineInstr *MI = MO.getParent();
+
+ if (!isALUInstr(MI->getOpcode())) {
+ return false;
+ }
+
+ // XXX: Handle cases when an instruction can reference an immediate from
+ // multiple operands:
+ //
+ // 1. Each operand uses the same immediate value
+ // 2. There is room in the instruction group (MIBundle) for another immediate.
+ // We need to start using bundles first.
+ if (operandUsesImmediate(*MI, R600Operands::SRC0) ||
+ operandUsesImmediate(*MI, R600Operands::SRC1) ||
+ operandUsesImmediate(*MI, R600Operands::SRC2)) {
+ return false;
+ }
+
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// Instruction flag getters/setters
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/R600InstrInfo.h b/lib/Target/AMDGPU/R600InstrInfo.h
index cec1c3b..7c62a53 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/lib/Target/AMDGPU/R600InstrInfo.h
@@ -50,6 +50,9 @@ namespace llvm {
bool isReductionOp(unsigned opcode) const;
bool isCubeOp(unsigned opcode) const;
+ /// isALUInstr - Returns true if this Opcode represents an ALU instruction.
+ bool isALUInstr(unsigned Opcode) const;
+
/// isVector - Vector instructions are instructions that must fill all
/// instruction slots within an instruction group.
bool isVector(const MachineInstr &MI) const;
@@ -133,6 +136,15 @@ namespace llvm {
/// setImmOperand - Helper function for setting instruction flag values.
void setImmOperand(MachineInstr *MI, R600Operands::Ops Op, int64_t Imm) const;
+ /// operandUsesImmediate - Return true if it is legal for this instruction
+ /// to reference an immediate from one of its opreand and the given operand
+ /// is using an immediate.
+ bool operandUsesImmediate(const MachineInstr &MI, R600Operands::Ops Op) const;
+
+ /// canUseImmediate - Return true if this operand can be replaced with an
+ /// immediate.
+ bool canUseImmediate(const MachineOperand &MO) const;
+
///hasFlagOperand - Returns true if this instruction has an operand for
/// storing target flags.
bool hasFlagOperand(const MachineInstr &MI) const;
diff --git a/test/CodeGen/R600/fcmp-cnd.ll b/test/CodeGen/R600/fcmp-cnd.ll
index c6b6236..a94cfb5 100644
--- a/test/CodeGen/R600/fcmp-cnd.ll
+++ b/test/CodeGen/R600/fcmp-cnd.ll
@@ -1,6 +1,8 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;Not checking arguments 2 and 3 to CNDE, because they may change between
+;registers and literal.x depending on what the optimizer does.
+;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
entry:
diff --git a/test/CodeGen/R600/fcmp-cnde-int-args.ll b/test/CodeGen/R600/fcmp-cnde-int-args.ll
index 92f3b5f..5c981ef 100644
--- a/test/CodeGen/R600/fcmp-cnde-int-args.ll
+++ b/test/CodeGen/R600/fcmp-cnde-int-args.ll
@@ -4,7 +4,7 @@
; chance to optimize the fcmp + select instructions to CNDE was missed
; due to the fact that the operands to fcmp and select had different types
-;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, 0.0, -1}}
define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
entry:
diff --git a/test/CodeGen/R600/selectcc-icmp-select-float.ll b/test/CodeGen/R600/selectcc-icmp-select-float.ll
index f1f8ab1..f65a300 100644
--- a/test/CodeGen/R600/selectcc-icmp-select-float.ll
+++ b/test/CodeGen/R600/selectcc-icmp-select-float.ll
@@ -2,7 +2,7 @@
; Note additional optimizations may cause this SGT to be replaced with a
; CND* instruction.
-; CHECK: SGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: SGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, -1}}
; Test a selectcc with i32 LHS/RHS and float True/False
define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) {
diff --git a/test/CodeGen/R600/selectcc_cnde.ll b/test/CodeGen/R600/selectcc_cnde.ll
index e06a170..f0a0f51 100644
--- a/test/CodeGen/R600/selectcc_cnde.ll
+++ b/test/CodeGen/R600/selectcc_cnde.ll
@@ -1,7 +1,7 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
;CHECK-NOT: SETE
-;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1.0, literal.x, [-0-9]+\(2.0}}
define void @test(float addrspace(1)* %out, float addrspace(1)* %in) {
%1 = load float addrspace(1)* %in
%2 = fcmp oeq float %1, 0.0
diff --git a/test/CodeGen/R600/selectcc_cnde_int.ll b/test/CodeGen/R600/selectcc_cnde_int.ll
index 03d000f..b38078e 100644
--- a/test/CodeGen/R600/selectcc_cnde_int.ll
+++ b/test/CodeGen/R600/selectcc_cnde_int.ll
@@ -1,7 +1,7 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
;CHECK-NOT: SETE_INT
-;CHECK: CNDE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: CNDE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1, literal.x, 2}}
define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%1 = load i32 addrspace(1)* %in
%2 = icmp eq i32 %1, 0
--
1.7.11.4
More information about the mesa-dev
mailing list