[Mesa-dev] [PATCH] radeon/llvm: keeps frameindex after isel

Vincent Lejeune vljn at ovi.com
Thu Nov 8 13:12:49 PST 2012





----- Mail original -----
> De : Tom Stellard <tom at stellard.net>
> À : Vincent Lejeune <vljn at ovi.com>
> Cc : mesa-dev at lists.freedesktop.org
> Envoyé le : Jeudi 8 novembre 2012 20h59
> Objet : Re: [Mesa-dev] [PATCH] radeon/llvm: keeps frameindex after isel
> 
> On Tue, Nov 06, 2012 at 09:40:35PM +0100, Vincent Lejeune wrote:
>>  ---
>>   lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp      | 11 ------
>>   lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp | 23 ++++++------
>>   lib/Target/AMDGPU/R600ISelLowering.cpp       | 53 
> +++++++++++++++++++++++-----
>>   lib/Target/AMDGPU/R600ISelLowering.h         |  2 ++
>>   lib/Target/AMDGPU/R600Instructions.td        | 21 ++++++-----
>>   5 files changed, 71 insertions(+), 39 deletions(-)
>> 
>>  diff --git a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp 
> b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
>>  index 9ea1212..a09796a 100644
>>  --- a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
>>  +++ b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
>>  @@ -161,17 +161,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
>>     }
>>     switch (Opc) {
>>     default: break;
>>  -  case ISD::FrameIndex:
>>  -    {
>>  -      if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
>>  -        unsigned int FI = FIN->getIndex();
>>  -        EVT OpVT = N->getValueType(0);
>>  -        unsigned int NewOpc = AMDGPU::COPY;
>>  -        SDValue TFI = CurDAG->getRegister(AMDGPU::T0_X, MVT::i32);
>>  -        return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
>>  -      }
>>  -    }
>>  -    break;
>>     }
>>     return SelectCode(N);
>>   }
>>  diff --git a/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp 
> b/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp
>>  index 18db422..a2cb43a 100644
>>  --- a/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp
>>  +++ b/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp
>>  @@ -70,15 +70,16 @@ bool 
> R600AllocateMemoryRegsPass::runOnMachineFunction(MachineFunction &MF) {
>>         case AMDGPU::RegisterStore_i32:
>>         case AMDGPU::RegisterStore_f32:
>>           {
>>  -          int64_t Offset = (MI.getOperand(2).getImm() * 4) +
>>  -                           MI.getOperand(3).getImm() +
>>  -                           (IndirectRegOffset * 4);
>>  -          unsigned DstReg = 
> AMDGPU::R600_TReg32RegClass.getRegister(Offset);
>>  +          assert (MI.getOperand(1).isFI() && 
> MI.getOperand(1).getIndex() == 0);
>>             R600MachineFunctionInfo *MFI = 
> MF.getInfo<R600MachineFunctionInfo>();
>>  -
>>  +          int64_t Offset = MI.getOperand(3).getImm() +
>>  +                           (IndirectRegOffset * 4);
>>             MFI->IndirectChannels.set(MI.getOperand(3).getImm());
>>   
>>  -          if (MI.getOperand(1).getReg() == AMDGPU::ZERO) {
>>  +          if (MI.getOperand(2).isImm()) {
>>  +            // Direct Addressing
>>  +            Offset += MI.getOperand(2).getImm() * 4;
>>  +            unsigned DstReg = 
> AMDGPU::R600_TReg32RegClass.getRegister(Offset);
>>               MFI->ReservedRegs.push_back(DstReg);
>>               TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, DstReg,
>>                             MI.getOperand(0).getReg());
>>  @@ -86,7 +87,7 @@ bool 
> R600AllocateMemoryRegsPass::runOnMachineFunction(MachineFunction &MF) {
>>               MachineInstr *MOVA = TII->buildDefaultInstruction(*BB, I,
>>                                                           
> AMDGPU::MOVA_INT_eg,
>>                                                           AMDGPU::AR_X,
>>  -                                                        
> MI.getOperand(1).getReg());
>>  +                                                        
> MI.getOperand(2).getReg());
>>               TII->setImmOperand(MOVA, R600Operands::WRITE, 0);
>>               unsigned OffsetReg = 
> AMDGPU::R600_AddrRegClass.getRegister(Offset);
>>               MachineInstrBuilder MIBuilder = 
> TII->buildDefaultInstruction(*BB, I,
>>  @@ -101,12 +102,14 @@ bool 
> R600AllocateMemoryRegsPass::runOnMachineFunction(MachineFunction &MF) {
>>         case AMDGPU::RegisterLoad_i32:
>>         case AMDGPU::RegisterLoad_f32:
>>           {
>>  +          assert (MI.getOperand(1).isFI() && 
> MI.getOperand(1).getIndex() == 0);
>>             unsigned Channel = MI.getOperand(3).getImm();
>>  -          unsigned Offset = (MI.getOperand(2).getImm() * 4) + Channel +
>>  +          unsigned Offset = Channel +
>>                               (IndirectRegOffset * 4);
>>             unsigned OffsetReg;
>>   
>>  -          if (MI.getOperand(1).getReg() == AMDGPU::ZERO) {
>>  +          if (MI.getOperand(2).isImm()) {
>>  +            Offset += MI.getOperand(2).getImm() * 4;
>>               OffsetReg = AMDGPU::R600_TReg32RegClass.getRegister(Offset);
>>               TII->buildDefaultInstruction(MBB, I, AMDGPU::MOV,
>>                                            MI.getOperand(0).getReg(),
>>  @@ -116,7 +119,7 @@ bool 
> R600AllocateMemoryRegsPass::runOnMachineFunction(MachineFunction &MF) {
>>               MachineInstr *MOVA = TII->buildDefaultInstruction(*BB, I,
>>                                                           
> AMDGPU::MOVA_INT_eg,
>>                                                           AMDGPU::AR_X,
>>  -                                                        
> MI.getOperand(1).getReg());
>>  +                                                        
> MI.getOperand(2).getReg());
>>               TII->setImmOperand(MOVA, R600Operands::WRITE, 0);
>>               OffsetReg = AMDGPU::R600_AddrRegClass.getRegister(Offset);
>>               MachineInstrBuilder MIBuilder = 
> TII->buildDefaultInstruction(*BB, I,
>>  diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp 
> b/lib/Target/AMDGPU/R600ISelLowering.cpp
>>  index 9bc2e03..02ab6d0 100644
>>  --- a/lib/Target/AMDGPU/R600ISelLowering.cpp
>>  +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
>>  @@ -104,8 +104,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine 
> &TM) :
>>     setOperationAction(ISD::STORE, MVT::i32, Custom);
>>     setOperationAction(ISD::STORE, MVT::v4f32, Custom);
>>   
>>  -  setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
>>  -
>>     setTargetDAGCombine(ISD::FP_ROUND);
>>   
>>     setSchedulingPreference(Sched::VLIW);
>>  @@ -360,7 +358,6 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, 
> SelectionDAG &DAG) const
>>     case ISD::LOAD: return LowerLOAD(Op, DAG);
>>     case ISD::STORE: return LowerSTORE(Op, DAG);
>>     case ISD::FPOW: return LowerFPOW(Op, DAG);
>>  -  case ISD::FrameIndex: return DAG.getConstant(0, MVT::i32);
>>     case ISD::INTRINSIC_VOID: {
>>       SDValue Chain = Op.getOperand(0);
>>       unsigned IntrinsicID =
>>  @@ -804,6 +801,31 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, 
> SelectionDAG &DAG) const
>>     return Cond;
>>   }
>>   
>>  +bool R600TargetLowering::ReorderPointerArithm(SDValue Ptr, SelectionDAG 
> &DAG,
>>  +    SDValue &CleanPtr, SDValue &FrameIndex) const
>>  +{
>>  +  switch (Ptr.getOpcode()) {
>>  +  default: return false;
>>  +  case ISD::FrameIndex:
>>  +    FrameIndex = Ptr;
>>  +    CleanPtr = DAG.getConstant(0, MVT::i32);
>>  +    return true;
>>  +  case ISD::ADD:
>>  +  case ISD::OR:
>>  +    if (ReorderPointerArithm(Ptr.getOperand(0), DAG, CleanPtr, 
> FrameIndex)) {
>>  +      CleanPtr = DAG.getNode(Ptr.getOpcode(), Ptr.getDebugLoc(), MVT::i32,
>>  +          CleanPtr, Ptr.getOperand(1));
>>  +      return true;
>>  +    } else if (ReorderPointerArithm(Ptr.getOperand(1), DAG, CleanPtr, 
> FrameIndex)){
>>  +      CleanPtr = DAG.getNode(Ptr.getOpcode(), Ptr.getDebugLoc(), MVT::i32,
>>  +          Ptr.getOperand(0), CleanPtr);
>>  +      return true;
>>  +    } else {
>>  +      return false;
>>  +    }
>>  +  }
>>  +}
>>  +
>>   SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) 
> const
>>   {
>>     EVT VT = Op.getValueType();
>>  @@ -817,22 +839,28 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, 
> SelectionDAG &DAG) const
>>       return SDValue();
>>     }
>>   
>>  +  SDValue FI;
>>  +  SDValue PtrArithm;
>>  +  ReorderPointerArithm(Ptr, DAG, PtrArithm, FI);
>>  +  FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(FI.getNode());
>>  +  SDValue TargetFI = DAG.getTargetFrameIndex(FIN->getIndex(), 
> MVT::i32);
>>  +
>>     if (VT.isVector()) {
>>       EVT ElemVT = VT.getVectorElementType();
>>       SDValue Loads[4];
>>  -    Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
>>  +    Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), PtrArithm,
>>                         DAG.getConstant(4, MVT::i32));
>>   
>>       for (unsigned i = 0; i < 4; ++i) {
>>         Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
>>  -                             Chain, Ptr,
>>  +                             Chain, TargetFI, Ptr,
>>                                DAG.getTargetConstant(i, MVT::i32), // 
> Channel
>>                                Op.getOperand(2));
> 
> Why are you doing the address and offset calculation here, rather than
> using the ComplexPattern declared in R600Instructions.td ?

LLVM expects frameindex to behave like memory addresses ; it is legal to find something like :
i32 add ( i32 add (i32 add (i32 frameindex)  constant 4) constant 4) constant 4)

The Addr ComplexPattern expects Frameindex to be lhs or rhs of the topmost expression, which is not always the case ;
and it does not like to modify Selection Graph when merging instructions (I was generating a constant 0 Node to replace frameindex but
the ConstantNode 0 was already merged in the graph). Besides, LLVM can combine nodes after custom lowering them and automaticaly
 transforms ((FrameIndex + 16) << 4) to 1 after replacing Frameindex by 0 in the expression tree.


> 
>>       }
>>       LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Loads, 4);
>>     } else {
>>       LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
>>  -                              Chain, Ptr,
>>  +                              Chain, TargetFI, PtrArithm,
>>                                 DAG.getTargetConstant(0, MVT::i32), // 
> Channel
>>                                 Op.getOperand(2));
>>     }
>>  @@ -859,12 +887,18 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, 
> SelectionDAG &DAG) const
>>       return SDValue();
>>     }
>>   
>>  +  SDValue FI;
>>  +  SDValue PtrArithm;
>>  +  ReorderPointerArithm(Ptr, DAG, PtrArithm, FI);
>>  +  FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(FI.getNode());
>>  +  SDValue TargetFI = DAG.getTargetFrameIndex(FIN->getIndex(), 
> MVT::i32);
>>  +
>>     if (VT.isVector()) {
>>       EVT ElemVT = VT.getVectorElementType();
>>       SDValue Stores[4];
>>   
>>       // XXX: I'm not sure how to explain this.
>>  -    Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
>>  +    Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), PtrArithm,
>>                         DAG.getConstant(4, MVT::i32));
>>   
>>       for (unsigned i = 0; i < 4; ++i) {
>>  @@ -872,7 +906,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, 
> SelectionDAG &DAG) const
>>                                    Value, DAG.getConstant(i, MVT::i32));
>>   
>>         Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
>>  -                              Chain, Elem, Ptr,
>>  +                              Chain, Elem, TargetFI, Ptr,
>>                                 DAG.getTargetConstant(i, MVT::i32)); // 
> Channel
>>         MFI->IndirectChannels.set(i);
>>       }
>>  @@ -881,7 +915,8 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, 
> SelectionDAG &DAG) const
>>       if (VT == MVT::i8) {
>>         Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
>>       }
>>  -    Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, 
> Value, Ptr,
>>  +    Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain,
>>  +        Value, TargetFI, PtrArithm,
>>       DAG.getTargetConstant(0, MVT::i32)); // Channel 
>>       MFI->IndirectChannels.set(0);
>>     }
>>  diff --git a/lib/Target/AMDGPU/R600ISelLowering.h 
> b/lib/Target/AMDGPU/R600ISelLowering.h
>>  index a2d7934..ded30d2 100644
>>  --- a/lib/Target/AMDGPU/R600ISelLowering.h
>>  +++ b/lib/Target/AMDGPU/R600ISelLowering.h
>>  @@ -66,6 +66,8 @@ private:
>>     SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
>>     
>>     bool isZero(SDValue Op) const;
>>  +  bool ReorderPointerArithm(SDValue Ptr, SelectionDAG &DAG,
>>  +      SDValue &CleanPtr, SDValue &FrameIndex) const;
>>   };
>>   
>>   } // End namespace llvm;
>>  diff --git a/lib/Target/AMDGPU/R600Instructions.td 
> b/lib/Target/AMDGPU/R600Instructions.td
>>  index d081824..13fe297 100644
>>  --- a/lib/Target/AMDGPU/R600Instructions.td
>>  +++ b/lib/Target/AMDGPU/R600Instructions.td
>>  @@ -426,11 +426,11 @@ def isR600toCayman : Predicate<
>>   
> //===----------------------------------------------------------------------===//
>>   
>>   def REGISTER_LOAD : SDNode<"AMDGPUISD::REGISTER_LOAD",
>>  -                          SDTypeProfile<1, 2, [SDTCisPtrTy<1>, 
> SDTCisInt<2>]>,
>>  +                          SDTypeProfile<1, 3, [SDTCisPtrTy<1>, 
> SDTCisInt<2>, SDTCisInt<3>]>,
>>                             [SDNPHasChain, SDNPMayLoad]>;
>>   
>>   def REGISTER_STORE : SDNode<"AMDGPUISD::REGISTER_STORE",
>>  -                           SDTypeProfile<0, 3, [SDTCisPtrTy<1>, 
> SDTCisInt<2>]>,
>>  +                           SDTypeProfile<0, 4, [SDTCisPtrTy<1>, 
> SDTCisInt<2>, SDTCisInt<3>]>,
>>                              [SDNPHasChain, SDNPMayStore]>;
>>   
>>   def INTERP: SDNode<"AMDGPUISD::INTERP",
>>  @@ -1386,17 +1386,20 @@ def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
>>   let isPseudo = 1, isCodeGenOnly =1 in {
>>   
>>   class RegisterLoad <ValueType vt> : InstR600 <0x0,
>>  -  (outs R600_Reg32:$dst), (ins FRAMEri:$addr, i32imm:$chan),
>>  -  "RegisterLoad $dst, $addr",
>>  -  [(set (vt R600_Reg32:$dst), (REGISTER_LOAD ADDRIndirect:$addr,
>>  -                               (i32 timm:$chan)))],
>>  +  (outs R600_Reg32:$dst), 
>>  +  (ins i32imm:$frameidx, R600_Reg32:$offset, i32imm:$chan),
>>  +  "RegisterLoad $dst, $offset",
>>  +  [(set (vt R600_Reg32:$dst), (REGISTER_LOAD (i32 tframeindex:$frameidx),
>>  +    (i32 R600_Reg32:$offset), (i32 timm:$chan)))],
>>     NullALU
>>   >;
>>   
>>   class RegisterStore <ValueType vt> : InstR600 <0x0,
>>  -  (outs), (ins R600_Reg32:$val, FRAMEri:$addr, i32imm:$chan),
>>  -  "RegisterStore_i32 $val, $addr",
>>  -  [(REGISTER_STORE (vt R600_Reg32:$val), ADDRIndirect:$addr, (i32 
> timm:$chan))],
>>  +  (outs),
>>  +  (ins R600_Reg32:$val, i32imm:$frameidx, R600_Reg32:$offset, 
> i32imm:$chan),
>>  +  "RegisterStore_i32 $val, $offset",
>>  +  [(REGISTER_STORE (vt R600_Reg32:$val), 
>>  +    (i32 tframeindex:$frameidx), (i32 R600_Reg32:$offset), (i32 
> timm:$chan))],
>>     NullALU
>>   >;
>>   
>>  -- 
>>  1.7.11.7
>> 
>>  _______________________________________________
>>  mesa-dev mailing list
>>  mesa-dev at lists.freedesktop.org
>>  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 


More information about the mesa-dev mailing list