[Mesa-dev] [PATCH] radeon/llvm: keeps frameindex after isel

Tom Stellard tom at stellard.net
Thu Nov 8 11:59:51 PST 2012


On Tue, Nov 06, 2012 at 09:40:35PM +0100, Vincent Lejeune wrote:
> ---
>  lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp      | 11 ------
>  lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp | 23 ++++++------
>  lib/Target/AMDGPU/R600ISelLowering.cpp       | 53 +++++++++++++++++++++++-----
>  lib/Target/AMDGPU/R600ISelLowering.h         |  2 ++
>  lib/Target/AMDGPU/R600Instructions.td        | 21 ++++++-----
>  5 files changed, 71 insertions(+), 39 deletions(-)
> 
> diff --git a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
> index 9ea1212..a09796a 100644
> --- a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
> +++ b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
> @@ -161,17 +161,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
>    }
>    switch (Opc) {
>    default: break;
> -  case ISD::FrameIndex:
> -    {
> -      if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
> -        unsigned int FI = FIN->getIndex();
> -        EVT OpVT = N->getValueType(0);
> -        unsigned int NewOpc = AMDGPU::COPY;
> -        SDValue TFI = CurDAG->getRegister(AMDGPU::T0_X, MVT::i32);
> -        return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
> -      }
> -    }
> -    break;
>    }
>    return SelectCode(N);
>  }
> diff --git a/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp b/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp
> index 18db422..a2cb43a 100644
> --- a/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp
> +++ b/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp
> @@ -70,15 +70,16 @@ bool R600AllocateMemoryRegsPass::runOnMachineFunction(MachineFunction &MF) {
>        case AMDGPU::RegisterStore_i32:
>        case AMDGPU::RegisterStore_f32:
>          {
> -          int64_t Offset = (MI.getOperand(2).getImm() * 4) +
> -                           MI.getOperand(3).getImm() +
> -                           (IndirectRegOffset * 4);
> -          unsigned DstReg = AMDGPU::R600_TReg32RegClass.getRegister(Offset);
> +          assert (MI.getOperand(1).isFI() && MI.getOperand(1).getIndex() == 0);
>            R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
> -
> +          int64_t Offset = MI.getOperand(3).getImm() +
> +                           (IndirectRegOffset * 4);
>            MFI->IndirectChannels.set(MI.getOperand(3).getImm());
>  
> -          if (MI.getOperand(1).getReg() == AMDGPU::ZERO) {
> +          if (MI.getOperand(2).isImm()) {
> +            // Direct Addressing
> +            Offset += MI.getOperand(2).getImm() * 4;
> +            unsigned DstReg = AMDGPU::R600_TReg32RegClass.getRegister(Offset);
>              MFI->ReservedRegs.push_back(DstReg);
>              TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, DstReg,
>                            MI.getOperand(0).getReg());
> @@ -86,7 +87,7 @@ bool R600AllocateMemoryRegsPass::runOnMachineFunction(MachineFunction &MF) {
>              MachineInstr *MOVA = TII->buildDefaultInstruction(*BB, I,
>                                                          AMDGPU::MOVA_INT_eg,
>                                                          AMDGPU::AR_X,
> -                                                        MI.getOperand(1).getReg());
> +                                                        MI.getOperand(2).getReg());
>              TII->setImmOperand(MOVA, R600Operands::WRITE, 0);
>              unsigned OffsetReg = AMDGPU::R600_AddrRegClass.getRegister(Offset);
>              MachineInstrBuilder MIBuilder = TII->buildDefaultInstruction(*BB, I,
> @@ -101,12 +102,14 @@ bool R600AllocateMemoryRegsPass::runOnMachineFunction(MachineFunction &MF) {
>        case AMDGPU::RegisterLoad_i32:
>        case AMDGPU::RegisterLoad_f32:
>          {
> +          assert (MI.getOperand(1).isFI() && MI.getOperand(1).getIndex() == 0);
>            unsigned Channel = MI.getOperand(3).getImm();
> -          unsigned Offset = (MI.getOperand(2).getImm() * 4) + Channel +
> +          unsigned Offset = Channel +
>                              (IndirectRegOffset * 4);
>            unsigned OffsetReg;
>  
> -          if (MI.getOperand(1).getReg() == AMDGPU::ZERO) {
> +          if (MI.getOperand(2).isImm()) {
> +            Offset += MI.getOperand(2).getImm() * 4;
>              OffsetReg = AMDGPU::R600_TReg32RegClass.getRegister(Offset);
>              TII->buildDefaultInstruction(MBB, I, AMDGPU::MOV,
>                                           MI.getOperand(0).getReg(),
> @@ -116,7 +119,7 @@ bool R600AllocateMemoryRegsPass::runOnMachineFunction(MachineFunction &MF) {
>              MachineInstr *MOVA = TII->buildDefaultInstruction(*BB, I,
>                                                          AMDGPU::MOVA_INT_eg,
>                                                          AMDGPU::AR_X,
> -                                                        MI.getOperand(1).getReg());
> +                                                        MI.getOperand(2).getReg());
>              TII->setImmOperand(MOVA, R600Operands::WRITE, 0);
>              OffsetReg = AMDGPU::R600_AddrRegClass.getRegister(Offset);
>              MachineInstrBuilder MIBuilder = TII->buildDefaultInstruction(*BB, I,
> diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
> index 9bc2e03..02ab6d0 100644
> --- a/lib/Target/AMDGPU/R600ISelLowering.cpp
> +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
> @@ -104,8 +104,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
>    setOperationAction(ISD::STORE, MVT::i32, Custom);
>    setOperationAction(ISD::STORE, MVT::v4f32, Custom);
>  
> -  setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
> -
>    setTargetDAGCombine(ISD::FP_ROUND);
>  
>    setSchedulingPreference(Sched::VLIW);
> @@ -360,7 +358,6 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
>    case ISD::LOAD: return LowerLOAD(Op, DAG);
>    case ISD::STORE: return LowerSTORE(Op, DAG);
>    case ISD::FPOW: return LowerFPOW(Op, DAG);
> -  case ISD::FrameIndex: return DAG.getConstant(0, MVT::i32);
>    case ISD::INTRINSIC_VOID: {
>      SDValue Chain = Op.getOperand(0);
>      unsigned IntrinsicID =
> @@ -804,6 +801,31 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
>    return Cond;
>  }
>  
> +bool R600TargetLowering::ReorderPointerArithm(SDValue Ptr, SelectionDAG &DAG,
> +    SDValue &CleanPtr, SDValue &FrameIndex) const
> +{
> +  switch (Ptr.getOpcode()) {
> +  default: return false;
> +  case ISD::FrameIndex:
> +    FrameIndex = Ptr;
> +    CleanPtr = DAG.getConstant(0, MVT::i32);
> +    return true;
> +  case ISD::ADD:
> +  case ISD::OR:
> +    if (ReorderPointerArithm(Ptr.getOperand(0), DAG, CleanPtr, FrameIndex)) {
> +      CleanPtr = DAG.getNode(Ptr.getOpcode(), Ptr.getDebugLoc(), MVT::i32,
> +          CleanPtr, Ptr.getOperand(1));
> +      return true;
> +    } else if (ReorderPointerArithm(Ptr.getOperand(1), DAG, CleanPtr, FrameIndex)){
> +      CleanPtr = DAG.getNode(Ptr.getOpcode(), Ptr.getDebugLoc(), MVT::i32,
> +          Ptr.getOperand(0), CleanPtr);
> +      return true;
> +    } else {
> +      return false;
> +    }
> +  }
> +}
> +
>  SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
>  {
>    EVT VT = Op.getValueType();
> @@ -817,22 +839,28 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
>      return SDValue();
>    }
>  
> +  SDValue FI;
> +  SDValue PtrArithm;
> +  ReorderPointerArithm(Ptr, DAG, PtrArithm, FI);
> +  FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(FI.getNode());
> +  SDValue TargetFI = DAG.getTargetFrameIndex(FIN->getIndex(), MVT::i32);
> +
>    if (VT.isVector()) {
>      EVT ElemVT = VT.getVectorElementType();
>      SDValue Loads[4];
> -    Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
> +    Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), PtrArithm,
>                        DAG.getConstant(4, MVT::i32));
>  
>      for (unsigned i = 0; i < 4; ++i) {
>        Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
> -                             Chain, Ptr,
> +                             Chain, TargetFI, Ptr,
>                               DAG.getTargetConstant(i, MVT::i32), // Channel
>                               Op.getOperand(2));

Why are you doing the address and offset calculation here, rather than
using the ComplexPattern declared in R600Instructions.td ?

>      }
>      LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Loads, 4);
>    } else {
>      LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
> -                              Chain, Ptr,
> +                              Chain, TargetFI, PtrArithm,
>                                DAG.getTargetConstant(0, MVT::i32), // Channel
>                                Op.getOperand(2));
>    }
> @@ -859,12 +887,18 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const
>      return SDValue();
>    }
>  
> +  SDValue FI;
> +  SDValue PtrArithm;
> +  ReorderPointerArithm(Ptr, DAG, PtrArithm, FI);
> +  FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(FI.getNode());
> +  SDValue TargetFI = DAG.getTargetFrameIndex(FIN->getIndex(), MVT::i32);
> +
>    if (VT.isVector()) {
>      EVT ElemVT = VT.getVectorElementType();
>      SDValue Stores[4];
>  
>      // XXX: I'm not sure how to explain this.
> -    Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
> +    Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), PtrArithm,
>                        DAG.getConstant(4, MVT::i32));
>  
>      for (unsigned i = 0; i < 4; ++i) {
> @@ -872,7 +906,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const
>                                   Value, DAG.getConstant(i, MVT::i32));
>  
>        Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
> -                              Chain, Elem, Ptr,
> +                              Chain, Elem, TargetFI, Ptr,
>                                DAG.getTargetConstant(i, MVT::i32)); // Channel
>        MFI->IndirectChannels.set(i);
>      }
> @@ -881,7 +915,8 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const
>      if (VT == MVT::i8) {
>        Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
>      }
> -    Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
> +    Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain,
> +        Value, TargetFI, PtrArithm,
>      DAG.getTargetConstant(0, MVT::i32)); // Channel 
>      MFI->IndirectChannels.set(0);
>    }
> diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h
> index a2d7934..ded30d2 100644
> --- a/lib/Target/AMDGPU/R600ISelLowering.h
> +++ b/lib/Target/AMDGPU/R600ISelLowering.h
> @@ -66,6 +66,8 @@ private:
>    SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
>    
>    bool isZero(SDValue Op) const;
> +  bool ReorderPointerArithm(SDValue Ptr, SelectionDAG &DAG,
> +      SDValue &CleanPtr, SDValue &FrameIndex) const;
>  };
>  
>  } // End namespace llvm;
> diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
> index d081824..13fe297 100644
> --- a/lib/Target/AMDGPU/R600Instructions.td
> +++ b/lib/Target/AMDGPU/R600Instructions.td
> @@ -426,11 +426,11 @@ def isR600toCayman : Predicate<
>  //===----------------------------------------------------------------------===//
>  
>  def REGISTER_LOAD : SDNode<"AMDGPUISD::REGISTER_LOAD",
> -                          SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
> +                          SDTypeProfile<1, 3, [SDTCisPtrTy<1>, SDTCisInt<2>, SDTCisInt<3>]>,
>                            [SDNPHasChain, SDNPMayLoad]>;
>  
>  def REGISTER_STORE : SDNode<"AMDGPUISD::REGISTER_STORE",
> -                           SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
> +                           SDTypeProfile<0, 4, [SDTCisPtrTy<1>, SDTCisInt<2>, SDTCisInt<3>]>,
>                             [SDNPHasChain, SDNPMayStore]>;
>  
>  def INTERP: SDNode<"AMDGPUISD::INTERP",
> @@ -1386,17 +1386,20 @@ def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
>  let isPseudo = 1, isCodeGenOnly =1 in {
>  
>  class RegisterLoad <ValueType vt> : InstR600 <0x0,
> -  (outs R600_Reg32:$dst), (ins FRAMEri:$addr, i32imm:$chan),
> -  "RegisterLoad $dst, $addr",
> -  [(set (vt R600_Reg32:$dst), (REGISTER_LOAD ADDRIndirect:$addr,
> -                               (i32 timm:$chan)))],
> +  (outs R600_Reg32:$dst), 
> +  (ins i32imm:$frameidx, R600_Reg32:$offset, i32imm:$chan),
> +  "RegisterLoad $dst, $offset",
> +  [(set (vt R600_Reg32:$dst), (REGISTER_LOAD (i32 tframeindex:$frameidx),
> +    (i32 R600_Reg32:$offset), (i32 timm:$chan)))],
>    NullALU
>  >;
>  
>  class RegisterStore <ValueType vt> : InstR600 <0x0,
> -  (outs), (ins R600_Reg32:$val, FRAMEri:$addr, i32imm:$chan),
> -  "RegisterStore_i32 $val, $addr",
> -  [(REGISTER_STORE (vt R600_Reg32:$val), ADDRIndirect:$addr, (i32 timm:$chan))],
> +  (outs),
> +  (ins R600_Reg32:$val, i32imm:$frameidx, R600_Reg32:$offset, i32imm:$chan),
> +  "RegisterStore_i32 $val, $offset",
> +  [(REGISTER_STORE (vt R600_Reg32:$val), 
> +    (i32 tframeindex:$frameidx), (i32 R600_Reg32:$offset), (i32 timm:$chan))],
>    NullALU
>  >;
>  
> -- 
> 1.7.11.7
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list