[Mesa-dev] [PATCH] radeon/llvm: keeps frameindex after isel
Tom Stellard
tom at stellard.net
Thu Nov 8 11:59:51 PST 2012
On Tue, Nov 06, 2012 at 09:40:35PM +0100, Vincent Lejeune wrote:
> ---
> lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp | 11 ------
> lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp | 23 ++++++------
> lib/Target/AMDGPU/R600ISelLowering.cpp | 53 +++++++++++++++++++++++-----
> lib/Target/AMDGPU/R600ISelLowering.h | 2 ++
> lib/Target/AMDGPU/R600Instructions.td | 21 ++++++-----
> 5 files changed, 71 insertions(+), 39 deletions(-)
>
> diff --git a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
> index 9ea1212..a09796a 100644
> --- a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
> +++ b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
> @@ -161,17 +161,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
> }
> switch (Opc) {
> default: break;
> - case ISD::FrameIndex:
> - {
> - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
> - unsigned int FI = FIN->getIndex();
> - EVT OpVT = N->getValueType(0);
> - unsigned int NewOpc = AMDGPU::COPY;
> - SDValue TFI = CurDAG->getRegister(AMDGPU::T0_X, MVT::i32);
> - return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
> - }
> - }
> - break;
> }
> return SelectCode(N);
> }
> diff --git a/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp b/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp
> index 18db422..a2cb43a 100644
> --- a/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp
> +++ b/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp
> @@ -70,15 +70,16 @@ bool R600AllocateMemoryRegsPass::runOnMachineFunction(MachineFunction &MF) {
> case AMDGPU::RegisterStore_i32:
> case AMDGPU::RegisterStore_f32:
> {
> - int64_t Offset = (MI.getOperand(2).getImm() * 4) +
> - MI.getOperand(3).getImm() +
> - (IndirectRegOffset * 4);
> - unsigned DstReg = AMDGPU::R600_TReg32RegClass.getRegister(Offset);
> + assert (MI.getOperand(1).isFI() && MI.getOperand(1).getIndex() == 0);
> R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
> -
> + int64_t Offset = MI.getOperand(3).getImm() +
> + (IndirectRegOffset * 4);
> MFI->IndirectChannels.set(MI.getOperand(3).getImm());
>
> - if (MI.getOperand(1).getReg() == AMDGPU::ZERO) {
> + if (MI.getOperand(2).isImm()) {
> + // Direct Addressing
> + Offset += MI.getOperand(2).getImm() * 4;
> + unsigned DstReg = AMDGPU::R600_TReg32RegClass.getRegister(Offset);
> MFI->ReservedRegs.push_back(DstReg);
> TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, DstReg,
> MI.getOperand(0).getReg());
> @@ -86,7 +87,7 @@ bool R600AllocateMemoryRegsPass::runOnMachineFunction(MachineFunction &MF) {
> MachineInstr *MOVA = TII->buildDefaultInstruction(*BB, I,
> AMDGPU::MOVA_INT_eg,
> AMDGPU::AR_X,
> - MI.getOperand(1).getReg());
> + MI.getOperand(2).getReg());
> TII->setImmOperand(MOVA, R600Operands::WRITE, 0);
> unsigned OffsetReg = AMDGPU::R600_AddrRegClass.getRegister(Offset);
> MachineInstrBuilder MIBuilder = TII->buildDefaultInstruction(*BB, I,
> @@ -101,12 +102,14 @@ bool R600AllocateMemoryRegsPass::runOnMachineFunction(MachineFunction &MF) {
> case AMDGPU::RegisterLoad_i32:
> case AMDGPU::RegisterLoad_f32:
> {
> + assert (MI.getOperand(1).isFI() && MI.getOperand(1).getIndex() == 0);
> unsigned Channel = MI.getOperand(3).getImm();
> - unsigned Offset = (MI.getOperand(2).getImm() * 4) + Channel +
> + unsigned Offset = Channel +
> (IndirectRegOffset * 4);
> unsigned OffsetReg;
>
> - if (MI.getOperand(1).getReg() == AMDGPU::ZERO) {
> + if (MI.getOperand(2).isImm()) {
> + Offset += MI.getOperand(2).getImm() * 4;
> OffsetReg = AMDGPU::R600_TReg32RegClass.getRegister(Offset);
> TII->buildDefaultInstruction(MBB, I, AMDGPU::MOV,
> MI.getOperand(0).getReg(),
> @@ -116,7 +119,7 @@ bool R600AllocateMemoryRegsPass::runOnMachineFunction(MachineFunction &MF) {
> MachineInstr *MOVA = TII->buildDefaultInstruction(*BB, I,
> AMDGPU::MOVA_INT_eg,
> AMDGPU::AR_X,
> - MI.getOperand(1).getReg());
> + MI.getOperand(2).getReg());
> TII->setImmOperand(MOVA, R600Operands::WRITE, 0);
> OffsetReg = AMDGPU::R600_AddrRegClass.getRegister(Offset);
> MachineInstrBuilder MIBuilder = TII->buildDefaultInstruction(*BB, I,
> diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
> index 9bc2e03..02ab6d0 100644
> --- a/lib/Target/AMDGPU/R600ISelLowering.cpp
> +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
> @@ -104,8 +104,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
> setOperationAction(ISD::STORE, MVT::i32, Custom);
> setOperationAction(ISD::STORE, MVT::v4f32, Custom);
>
> - setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
> -
> setTargetDAGCombine(ISD::FP_ROUND);
>
> setSchedulingPreference(Sched::VLIW);
> @@ -360,7 +358,6 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
> case ISD::LOAD: return LowerLOAD(Op, DAG);
> case ISD::STORE: return LowerSTORE(Op, DAG);
> case ISD::FPOW: return LowerFPOW(Op, DAG);
> - case ISD::FrameIndex: return DAG.getConstant(0, MVT::i32);
> case ISD::INTRINSIC_VOID: {
> SDValue Chain = Op.getOperand(0);
> unsigned IntrinsicID =
> @@ -804,6 +801,31 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
> return Cond;
> }
>
> +bool R600TargetLowering::ReorderPointerArithm(SDValue Ptr, SelectionDAG &DAG,
> + SDValue &CleanPtr, SDValue &FrameIndex) const
> +{
> + switch (Ptr.getOpcode()) {
> + default: return false;
> + case ISD::FrameIndex:
> + FrameIndex = Ptr;
> + CleanPtr = DAG.getConstant(0, MVT::i32);
> + return true;
> + case ISD::ADD:
> + case ISD::OR:
> + if (ReorderPointerArithm(Ptr.getOperand(0), DAG, CleanPtr, FrameIndex)) {
> + CleanPtr = DAG.getNode(Ptr.getOpcode(), Ptr.getDebugLoc(), MVT::i32,
> + CleanPtr, Ptr.getOperand(1));
> + return true;
> + } else if (ReorderPointerArithm(Ptr.getOperand(1), DAG, CleanPtr, FrameIndex)){
> + CleanPtr = DAG.getNode(Ptr.getOpcode(), Ptr.getDebugLoc(), MVT::i32,
> + Ptr.getOperand(0), CleanPtr);
> + return true;
> + } else {
> + return false;
> + }
> + }
> +}
> +
> SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
> {
> EVT VT = Op.getValueType();
> @@ -817,22 +839,28 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
> return SDValue();
> }
>
> + SDValue FI;
> + SDValue PtrArithm;
> + ReorderPointerArithm(Ptr, DAG, PtrArithm, FI);
> + FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(FI.getNode());
> + SDValue TargetFI = DAG.getTargetFrameIndex(FIN->getIndex(), MVT::i32);
> +
> if (VT.isVector()) {
> EVT ElemVT = VT.getVectorElementType();
> SDValue Loads[4];
> - Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
> + Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), PtrArithm,
> DAG.getConstant(4, MVT::i32));
>
> for (unsigned i = 0; i < 4; ++i) {
> Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
> - Chain, Ptr,
> + Chain, TargetFI, Ptr,
> DAG.getTargetConstant(i, MVT::i32), // Channel
> Op.getOperand(2));
Why are you doing the address and offset calculation here, rather than
using the ComplexPattern declared in R600Instructions.td ?
> }
> LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Loads, 4);
> } else {
> LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
> - Chain, Ptr,
> + Chain, TargetFI, PtrArithm,
> DAG.getTargetConstant(0, MVT::i32), // Channel
> Op.getOperand(2));
> }
> @@ -859,12 +887,18 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const
> return SDValue();
> }
>
> + SDValue FI;
> + SDValue PtrArithm;
> + ReorderPointerArithm(Ptr, DAG, PtrArithm, FI);
> + FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(FI.getNode());
> + SDValue TargetFI = DAG.getTargetFrameIndex(FIN->getIndex(), MVT::i32);
> +
> if (VT.isVector()) {
> EVT ElemVT = VT.getVectorElementType();
> SDValue Stores[4];
>
> // XXX: I'm not sure how to explain this.
> - Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
> + Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), PtrArithm,
> DAG.getConstant(4, MVT::i32));
>
> for (unsigned i = 0; i < 4; ++i) {
> @@ -872,7 +906,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const
> Value, DAG.getConstant(i, MVT::i32));
>
> Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
> - Chain, Elem, Ptr,
> + Chain, Elem, TargetFI, Ptr,
> DAG.getTargetConstant(i, MVT::i32)); // Channel
> MFI->IndirectChannels.set(i);
> }
> @@ -881,7 +915,8 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const
> if (VT == MVT::i8) {
> Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
> }
> - Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
> + Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain,
> + Value, TargetFI, PtrArithm,
> DAG.getTargetConstant(0, MVT::i32)); // Channel
> MFI->IndirectChannels.set(0);
> }
> diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h
> index a2d7934..ded30d2 100644
> --- a/lib/Target/AMDGPU/R600ISelLowering.h
> +++ b/lib/Target/AMDGPU/R600ISelLowering.h
> @@ -66,6 +66,8 @@ private:
> SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
>
> bool isZero(SDValue Op) const;
> + bool ReorderPointerArithm(SDValue Ptr, SelectionDAG &DAG,
> + SDValue &CleanPtr, SDValue &FrameIndex) const;
> };
>
> } // End namespace llvm;
> diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
> index d081824..13fe297 100644
> --- a/lib/Target/AMDGPU/R600Instructions.td
> +++ b/lib/Target/AMDGPU/R600Instructions.td
> @@ -426,11 +426,11 @@ def isR600toCayman : Predicate<
> //===----------------------------------------------------------------------===//
>
> def REGISTER_LOAD : SDNode<"AMDGPUISD::REGISTER_LOAD",
> - SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
> + SDTypeProfile<1, 3, [SDTCisPtrTy<1>, SDTCisInt<2>, SDTCisInt<3>]>,
> [SDNPHasChain, SDNPMayLoad]>;
>
> def REGISTER_STORE : SDNode<"AMDGPUISD::REGISTER_STORE",
> - SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
> + SDTypeProfile<0, 4, [SDTCisPtrTy<1>, SDTCisInt<2>, SDTCisInt<3>]>,
> [SDNPHasChain, SDNPMayStore]>;
>
> def INTERP: SDNode<"AMDGPUISD::INTERP",
> @@ -1386,17 +1386,20 @@ def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
> let isPseudo = 1, isCodeGenOnly =1 in {
>
> class RegisterLoad <ValueType vt> : InstR600 <0x0,
> - (outs R600_Reg32:$dst), (ins FRAMEri:$addr, i32imm:$chan),
> - "RegisterLoad $dst, $addr",
> - [(set (vt R600_Reg32:$dst), (REGISTER_LOAD ADDRIndirect:$addr,
> - (i32 timm:$chan)))],
> + (outs R600_Reg32:$dst),
> + (ins i32imm:$frameidx, R600_Reg32:$offset, i32imm:$chan),
> + "RegisterLoad $dst, $offset",
> + [(set (vt R600_Reg32:$dst), (REGISTER_LOAD (i32 tframeindex:$frameidx),
> + (i32 R600_Reg32:$offset), (i32 timm:$chan)))],
> NullALU
> >;
>
> class RegisterStore <ValueType vt> : InstR600 <0x0,
> - (outs), (ins R600_Reg32:$val, FRAMEri:$addr, i32imm:$chan),
> - "RegisterStore_i32 $val, $addr",
> - [(REGISTER_STORE (vt R600_Reg32:$val), ADDRIndirect:$addr, (i32 timm:$chan))],
> + (outs),
> + (ins R600_Reg32:$val, i32imm:$frameidx, R600_Reg32:$offset, i32imm:$chan),
> + "RegisterStore_i32 $val, $offset",
> + [(REGISTER_STORE (vt R600_Reg32:$val),
> + (i32 tframeindex:$frameidx), (i32 R600_Reg32:$offset), (i32 timm:$chan))],
> NullALU
> >;
>
> --
> 1.7.11.7
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list