[Mesa-dev] [PATCH] radeon/llvm: keeps frameindex after isel
Vincent Lejeune
vljn at ovi.com
Tue Nov 6 12:40:35 PST 2012
---
lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp | 11 ------
lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp | 23 ++++++------
lib/Target/AMDGPU/R600ISelLowering.cpp | 53 +++++++++++++++++++++++-----
lib/Target/AMDGPU/R600ISelLowering.h | 2 ++
lib/Target/AMDGPU/R600Instructions.td | 21 ++++++-----
5 files changed, 71 insertions(+), 39 deletions(-)
diff --git a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
index 9ea1212..a09796a 100644
--- a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
@@ -161,17 +161,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
switch (Opc) {
default: break;
- case ISD::FrameIndex:
- {
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
- unsigned int FI = FIN->getIndex();
- EVT OpVT = N->getValueType(0);
- unsigned int NewOpc = AMDGPU::COPY;
- SDValue TFI = CurDAG->getRegister(AMDGPU::T0_X, MVT::i32);
- return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
- }
- }
- break;
}
return SelectCode(N);
}
diff --git a/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp b/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp
index 18db422..a2cb43a 100644
--- a/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp
+++ b/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp
@@ -70,15 +70,16 @@ bool R600AllocateMemoryRegsPass::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::RegisterStore_i32:
case AMDGPU::RegisterStore_f32:
{
- int64_t Offset = (MI.getOperand(2).getImm() * 4) +
- MI.getOperand(3).getImm() +
- (IndirectRegOffset * 4);
- unsigned DstReg = AMDGPU::R600_TReg32RegClass.getRegister(Offset);
+ assert (MI.getOperand(1).isFI() && MI.getOperand(1).getIndex() == 0);
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
-
+ int64_t Offset = MI.getOperand(3).getImm() +
+ (IndirectRegOffset * 4);
MFI->IndirectChannels.set(MI.getOperand(3).getImm());
- if (MI.getOperand(1).getReg() == AMDGPU::ZERO) {
+ if (MI.getOperand(2).isImm()) {
+ // Direct Addressing
+ Offset += MI.getOperand(2).getImm() * 4;
+ unsigned DstReg = AMDGPU::R600_TReg32RegClass.getRegister(Offset);
MFI->ReservedRegs.push_back(DstReg);
TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, DstReg,
MI.getOperand(0).getReg());
@@ -86,7 +87,7 @@ bool R600AllocateMemoryRegsPass::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *MOVA = TII->buildDefaultInstruction(*BB, I,
AMDGPU::MOVA_INT_eg,
AMDGPU::AR_X,
- MI.getOperand(1).getReg());
+ MI.getOperand(2).getReg());
TII->setImmOperand(MOVA, R600Operands::WRITE, 0);
unsigned OffsetReg = AMDGPU::R600_AddrRegClass.getRegister(Offset);
MachineInstrBuilder MIBuilder = TII->buildDefaultInstruction(*BB, I,
@@ -101,12 +102,14 @@ bool R600AllocateMemoryRegsPass::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::RegisterLoad_i32:
case AMDGPU::RegisterLoad_f32:
{
+ assert (MI.getOperand(1).isFI() && MI.getOperand(1).getIndex() == 0);
unsigned Channel = MI.getOperand(3).getImm();
- unsigned Offset = (MI.getOperand(2).getImm() * 4) + Channel +
+ unsigned Offset = Channel +
(IndirectRegOffset * 4);
unsigned OffsetReg;
- if (MI.getOperand(1).getReg() == AMDGPU::ZERO) {
+ if (MI.getOperand(2).isImm()) {
+ Offset += MI.getOperand(2).getImm() * 4;
OffsetReg = AMDGPU::R600_TReg32RegClass.getRegister(Offset);
TII->buildDefaultInstruction(MBB, I, AMDGPU::MOV,
MI.getOperand(0).getReg(),
@@ -116,7 +119,7 @@ bool R600AllocateMemoryRegsPass::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *MOVA = TII->buildDefaultInstruction(*BB, I,
AMDGPU::MOVA_INT_eg,
AMDGPU::AR_X,
- MI.getOperand(1).getReg());
+ MI.getOperand(2).getReg());
TII->setImmOperand(MOVA, R600Operands::WRITE, 0);
OffsetReg = AMDGPU::R600_AddrRegClass.getRegister(Offset);
MachineInstrBuilder MIBuilder = TII->buildDefaultInstruction(*BB, I,
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 9bc2e03..02ab6d0 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -104,8 +104,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::STORE, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::v4f32, Custom);
- setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
-
setTargetDAGCombine(ISD::FP_ROUND);
setSchedulingPreference(Sched::VLIW);
@@ -360,7 +358,6 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::FPOW: return LowerFPOW(Op, DAG);
- case ISD::FrameIndex: return DAG.getConstant(0, MVT::i32);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
unsigned IntrinsicID =
@@ -804,6 +801,31 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
return Cond;
}
+bool R600TargetLowering::ReorderPointerArithm(SDValue Ptr, SelectionDAG &DAG,
+ SDValue &CleanPtr, SDValue &FrameIndex) const
+{
+ switch (Ptr.getOpcode()) {
+ default: return false;
+ case ISD::FrameIndex:
+ FrameIndex = Ptr;
+ CleanPtr = DAG.getConstant(0, MVT::i32);
+ return true;
+ case ISD::ADD:
+ case ISD::OR:
+ if (ReorderPointerArithm(Ptr.getOperand(0), DAG, CleanPtr, FrameIndex)) {
+ CleanPtr = DAG.getNode(Ptr.getOpcode(), Ptr.getDebugLoc(), MVT::i32,
+ CleanPtr, Ptr.getOperand(1));
+ return true;
+ } else if (ReorderPointerArithm(Ptr.getOperand(1), DAG, CleanPtr, FrameIndex)){
+ CleanPtr = DAG.getNode(Ptr.getOpcode(), Ptr.getDebugLoc(), MVT::i32,
+ Ptr.getOperand(0), CleanPtr);
+ return true;
+ } else {
+ return false;
+ }
+ }
+}
+
SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
{
EVT VT = Op.getValueType();
@@ -817,22 +839,28 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
return SDValue();
}
+ SDValue FI;
+ SDValue PtrArithm;
+ ReorderPointerArithm(Ptr, DAG, PtrArithm, FI);
+ FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(FI.getNode());
+ SDValue TargetFI = DAG.getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+
if (VT.isVector()) {
EVT ElemVT = VT.getVectorElementType();
SDValue Loads[4];
- Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
+ Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), PtrArithm,
DAG.getConstant(4, MVT::i32));
for (unsigned i = 0; i < 4; ++i) {
Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
- Chain, Ptr,
+ Chain, TargetFI, Ptr,
DAG.getTargetConstant(i, MVT::i32), // Channel
Op.getOperand(2));
}
LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Loads, 4);
} else {
LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
- Chain, Ptr,
+ Chain, TargetFI, PtrArithm,
DAG.getTargetConstant(0, MVT::i32), // Channel
Op.getOperand(2));
}
@@ -859,12 +887,18 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const
return SDValue();
}
+ SDValue FI;
+ SDValue PtrArithm;
+ ReorderPointerArithm(Ptr, DAG, PtrArithm, FI);
+ FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(FI.getNode());
+ SDValue TargetFI = DAG.getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+
if (VT.isVector()) {
EVT ElemVT = VT.getVectorElementType();
SDValue Stores[4];
// XXX: I'm not sure how to explain this.
- Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
+ Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), PtrArithm,
DAG.getConstant(4, MVT::i32));
for (unsigned i = 0; i < 4; ++i) {
@@ -872,7 +906,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const
Value, DAG.getConstant(i, MVT::i32));
Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
- Chain, Elem, Ptr,
+ Chain, Elem, TargetFI, Ptr,
DAG.getTargetConstant(i, MVT::i32)); // Channel
MFI->IndirectChannels.set(i);
}
@@ -881,7 +915,8 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const
if (VT == MVT::i8) {
Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
}
- Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
+ Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain,
+ Value, TargetFI, PtrArithm,
DAG.getTargetConstant(0, MVT::i32)); // Channel
MFI->IndirectChannels.set(0);
}
diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h
index a2d7934..ded30d2 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/lib/Target/AMDGPU/R600ISelLowering.h
@@ -66,6 +66,8 @@ private:
SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
bool isZero(SDValue Op) const;
+ bool ReorderPointerArithm(SDValue Ptr, SelectionDAG &DAG,
+ SDValue &CleanPtr, SDValue &FrameIndex) const;
};
} // End namespace llvm;
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index d081824..13fe297 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -426,11 +426,11 @@ def isR600toCayman : Predicate<
//===----------------------------------------------------------------------===//
def REGISTER_LOAD : SDNode<"AMDGPUISD::REGISTER_LOAD",
- SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
+ SDTypeProfile<1, 3, [SDTCisPtrTy<1>, SDTCisInt<2>, SDTCisInt<3>]>,
[SDNPHasChain, SDNPMayLoad]>;
def REGISTER_STORE : SDNode<"AMDGPUISD::REGISTER_STORE",
- SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
+ SDTypeProfile<0, 4, [SDTCisPtrTy<1>, SDTCisInt<2>, SDTCisInt<3>]>,
[SDNPHasChain, SDNPMayStore]>;
def INTERP: SDNode<"AMDGPUISD::INTERP",
@@ -1386,17 +1386,20 @@ def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
let isPseudo = 1, isCodeGenOnly =1 in {
class RegisterLoad <ValueType vt> : InstR600 <0x0,
- (outs R600_Reg32:$dst), (ins FRAMEri:$addr, i32imm:$chan),
- "RegisterLoad $dst, $addr",
- [(set (vt R600_Reg32:$dst), (REGISTER_LOAD ADDRIndirect:$addr,
- (i32 timm:$chan)))],
+ (outs R600_Reg32:$dst),
+ (ins i32imm:$frameidx, R600_Reg32:$offset, i32imm:$chan),
+ "RegisterLoad $dst, $offset",
+ [(set (vt R600_Reg32:$dst), (REGISTER_LOAD (i32 tframeindex:$frameidx),
+ (i32 R600_Reg32:$offset), (i32 timm:$chan)))],
NullALU
>;
class RegisterStore <ValueType vt> : InstR600 <0x0,
- (outs), (ins R600_Reg32:$val, FRAMEri:$addr, i32imm:$chan),
- "RegisterStore_i32 $val, $addr",
- [(REGISTER_STORE (vt R600_Reg32:$val), ADDRIndirect:$addr, (i32 timm:$chan))],
+ (outs),
+ (ins R600_Reg32:$val, i32imm:$frameidx, R600_Reg32:$offset, i32imm:$chan),
+ "RegisterStore_i32 $val, $offset",
+ [(REGISTER_STORE (vt R600_Reg32:$val),
+ (i32 tframeindex:$frameidx), (i32 R600_Reg32:$offset), (i32 timm:$chan))],
NullALU
>;
--
1.7.11.7
More information about the mesa-dev
mailing list