[Mesa-dev] [PATCH] R600: Support for indirect addressing

Wed Jan 23 08:50:08 PST 2013

Nice work !
I have some formatting comment, otherwise the patch looks good

----- Mail original -----
> De : ""Tom Stellard" <tom at stellard.net>
> À : mesa-dev at lists.freedesktop.org
> Cc : Tom Stellard <thomas.stellard at amd.com>
> Envoyé le : Mardi 22 janvier 2013 21h15
> Objet : [Mesa-dev] [PATCH] R600: Support for indirect addressing
> 
> From: Tom Stellard <thomas.stellard at amd.com>
> 
> Only implemented for R600 so far.  SI is missing implementations of a
> few callbacks used by the Indirect Addressing pass and needs code to
> handle frame indices.
> 
> At the moment R600 only supports array sizes of 16 dwords or less.
> Register packing of vector types is currently disabled, which means that a
> vec4 is stored in T0_X, T1_X, T2_X, T3_X, rather than T0_XYZW. In order
> to correctly pack registers in all cases, we will need to implement an
> analysis pass for R600 that determines the correct vector width for each
> array.
> ---
> lib/Target/R600/AMDGPU.h                          |   1 +
> lib/Target/R600/AMDGPUFrameLowering.cpp           | 121 ++++++++
> lib/Target/R600/AMDGPUFrameLowering.h             |  44 +++
> lib/Target/R600/AMDGPUISelLowering.cpp            |   2 +
> lib/Target/R600/AMDGPUISelLowering.h              |   2 +
> lib/Target/R600/AMDGPUIndirectAddressing.cpp      | 319 ++++++++++++++++++++++
> lib/Target/R600/AMDGPUInstrInfo.cpp               |  11 +-
> lib/Target/R600/AMDGPUInstrInfo.h                 |  60 +++-
> lib/Target/R600/AMDGPUInstrInfo.td                |   8 +
> lib/Target/R600/AMDGPUInstructions.td             |  39 ++-
> lib/Target/R600/AMDGPURegisterInfo.cpp            |  71 +++++
> lib/Target/R600/AMDGPURegisterInfo.h              |   2 +
> lib/Target/R600/AMDGPURegisterInfo.td             |   8 +
> lib/Target/R600/AMDGPUTargetMachine.cpp           |   6 +
> lib/Target/R600/AMDGPUTargetMachine.h             |   2 +-
> lib/Target/R600/AMDILFrameLowering.cpp            |  47 ----
> lib/Target/R600/AMDILFrameLowering.h              |  40 ---
> lib/Target/R600/AMDILISelDAGToDAG.cpp             |  30 +-
> lib/Target/R600/CMakeLists.txt                    |   1 +
> lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp |   5 +-
> lib/Target/R600/R600Defines.h                     |   3 +
> lib/Target/R600/R600ISelLowering.cpp              | 181 +++++++++++-
> lib/Target/R600/R600ISelLowering.h                |   7 +-
> lib/Target/R600/R600InstrInfo.cpp                 | 122 +++++++++
> lib/Target/R600/R600InstrInfo.h                   |  32 +++
> lib/Target/R600/R600Instructions.td               |  15 +
> lib/Target/R600/R600MachineFunctionInfo.h         |   2 +
> lib/Target/R600/R600RegisterInfo.cpp              |  14 +
> lib/Target/R600/R600RegisterInfo.td               |  73 +++++
> lib/Target/R600/SIInstrInfo.cpp                   |  48 ++++
> lib/Target/R600/SIInstrInfo.h                     |  26 ++
> 31 files changed, 1229 insertions(+), 113 deletions(-)
> create mode 100644 lib/Target/R600/AMDGPUFrameLowering.cpp
> create mode 100644 lib/Target/R600/AMDGPUFrameLowering.h
> create mode 100644 lib/Target/R600/AMDGPUIndirectAddressing.cpp
> delete mode 100644 lib/Target/R600/AMDILFrameLowering.cpp
> delete mode 100644 lib/Target/R600/AMDILFrameLowering.h
> 
> diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
> index 1aa607f..bac01a3 100644
> --- a/lib/Target/R600/AMDGPU.h
> +++ b/lib/Target/R600/AMDGPU.h
> @@ -36,6 +36,7 @@ FunctionPass *createSIInsertWaits(TargetMachine &tm);
> // Passes common to R600 and SI
> Pass *createAMDGPUStructurizeCFGPass();
> FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
> +FunctionPass* createAMDGPUIndirectAddressingPass(TargetMachine &tm);
> 
> } // End namespace llvm
> 
> diff --git a/lib/Target/R600/AMDGPUFrameLowering.cpp 
> b/lib/Target/R600/AMDGPUFrameLowering.cpp
> new file mode 100644
> index 0000000..45b9c9e
> --- /dev/null
> +++ b/lib/Target/R600/AMDGPUFrameLowering.cpp
> @@ -0,0 +1,121 @@
> +//===----------------------- AMDGPUFrameLowering.cpp 
> ----------------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//==-----------------------------------------------------------------------===//
> +//
> +// Interface to describe a layout of a stack frame on a AMDIL target machine
> +//
> +//===----------------------------------------------------------------------===//
> +#include "AMDGPUFrameLowering.h"
> +#include "AMDGPURegisterInfo.h"
> +#include "R600MachineFunctionInfo.h"
> +#include "llvm/CodeGen/MachineFrameInfo.h"
> +#include "llvm/CodeGen/MachineRegisterInfo.h"
> +#include "llvm/Instructions.h"
> +
> +using namespace llvm;
> +AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
> +    int LAO, unsigned TransAl)
> +  : TargetFrameLowering(D, StackAl, LAO, TransAl) { }
> +
> +AMDGPUFrameLowering::~AMDGPUFrameLowering() { }
> +
> +unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) 
> const {
> +
> +  // XXX: Hardcoding to 1 for now.
> +  //
> +  // I think the StackWidth should stored as metadata associated with the
> +  // MachineFunction.  This metadata can either be added by a frontend, or
> +  // calculated by a R600 specific LLVM IR pass.
> +  //
> +  // The StackWidth determines how stack objects are laid out in memory.
> +  // For a vector stack variable, like: int4 stack[2], the data will be stored
> +  // in the following ways depending on the StackWidth.
> +  //
> +  // StackWidth = 1:
> +  //
> +  // T0.X = stack[0].x
> +  // T1.X = stack[0].y
> +  // T2.X = stack[0].z
> +  // T3.X = stack[0].w
> +  // T4.X = stack[1].x
> +  // T5.X = stack[1].y
> +  // T6.X = stack[1].z
> +  // T7.X = stack[1].w
> +  //
> +  // StackWidth = 2:
> +  //
> +  // T0.X = stack[0].x
> +  // T0.Y = stack[0].y
> +  // T1.X = stack[0].z
> +  // T1.Y = stack[0].w
> +  // T2.X = stack[1].x
> +  // T2.Y = stack[1].y
> +  // T3.X = stack[1].z
> +  // T3.Y = stack[1].w
> +  // 
> +  // StackWidth = 4:
> +  // T0.X = stack[0].x
> +  // T0.Y = stack[0].y
> +  // T0.Z = stack[0].z
> +  // T0.W = stack[0].w
> +  // T1.X = stack[1].x
> +  // T1.Y = stack[1].y
> +  // T1.Z = stack[1].z
> +  // T1.W = stack[1].w
> +  return 1;
> +}
> +
> +/// \returns The number of registers allocated for \p FI.
> +int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
> +                                         int FI) const {
> +  const MachineFrameInfo *MFI = MF.getFrameInfo();
> +  unsigned Offset = 0;
> +  int UpperBound = FI == -1 ? MFI->getNumObjects() : FI;
> +
> +  for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) {
> +    const AllocaInst *Alloca = MFI->getObjectAllocation(i);
> +    unsigned ArrayElements;
> +    const Type *AllocaType = Alloca->getAllocatedType();
> +    const Type *ElementType;
> +
> +    if (AllocaType->isArrayTy()) {
> +      ArrayElements = AllocaType->getArrayNumElements();
> +      ElementType = AllocaType->getArrayElementType();
> +    } else {
> +      ArrayElements = 1;
> +      ElementType = AllocaType;
> +    }
> +
> +    unsigned VectorElements;
> +    if (ElementType->isVectorTy()) {
> +      VectorElements = ElementType->getVectorNumElements();
> +    } else {
> +      VectorElements = 1;
> +    }
> +
> +    Offset += (VectorElements / getStackWidth(MF)) * ArrayElements;
> +  }
> +  return Offset;
> +}
> +
> +const TargetFrameLowering::SpillSlot *
> +AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
> +  NumEntries = 0;
> +  return 0;
> +}
> +void
> +AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
> +}
> +void
> +AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock 
> &MBB) const {
> +}

80 lines wrap

> +
> +bool
> +AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
> +  return false;
> +}
> diff --git a/lib/Target/R600/AMDGPUFrameLowering.h 
> b/lib/Target/R600/AMDGPUFrameLowering.h
> new file mode 100644
> index 0000000..cf5742e
> --- /dev/null
> +++ b/lib/Target/R600/AMDGPUFrameLowering.h
> @@ -0,0 +1,44 @@
> +//===--------------------- AMDGPUFrameLowering.h ----------------*- C++ 
> -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// \brief Interface to describe a layout of a stack frame on a AMDIL 
> target
> +/// machine.
> +//
> +//===----------------------------------------------------------------------===//
> +#ifndef AMDILFRAME_LOWERING_H
> +#define AMDILFRAME_LOWERING_H
> +
> +#include "llvm/CodeGen/MachineFunction.h"
> +#include "llvm/Target/TargetFrameLowering.h"
> +
> +namespace llvm {
> +
> +/// \brief Information about the stack frame layout on the AMDGPU targets.
> +///
> +/// It holds the direction of the stack growth, the known stack alignment on
> +/// entry to each function, and the offset to the locals area.
> +/// See TargetFrameInfo for more comments.
> +class AMDGPUFrameLowering : public TargetFrameLowering {
> +public:
> +  AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
> +                      unsigned TransAl = 1);
> +  virtual ~AMDGPUFrameLowering();
> +
> +  /// \returns The number of 32-bit sub-registers that are used when 
> storing
> +  /// values to the stack.
> +  virtual unsigned getStackWidth(const MachineFunction &MF) const;
> +  virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
> +  virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) 
> const;
> +  virtual void emitPrologue(MachineFunction &MF) const;
> +  virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock 
> &MBB) const;
> +  virtual bool hasFP(const MachineFunction &MF) const;
> +};
> +} // namespace llvm
> +#endif // AMDILFRAME_LOWERING_H
> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp 
> b/lib/Target/R600/AMDGPUISelLowering.cpp
> index 309bcf5..a000689 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -414,5 +414,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned 
> Opcode) const {
>    NODE_NAME_CASE(INTERP_P0)
>    NODE_NAME_CASE(EXPORT)
>    NODE_NAME_CASE(CONST_ADDRESS)
> +  NODE_NAME_CASE(REGISTER_LOAD)
> +  NODE_NAME_CASE(REGISTER_STORE)
>    }
> }
> diff --git a/lib/Target/R600/AMDGPUISelLowering.h 
> b/lib/Target/R600/AMDGPUISelLowering.h
> index 9938c65..708c04a 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.h
> +++ b/lib/Target/R600/AMDGPUISelLowering.h
> @@ -124,6 +124,8 @@ enum {
>    INTERP_P0,
>    EXPORT,
>    CONST_ADDRESS,
> +  REGISTER_LOAD,
> +  REGISTER_STORE,
>    LAST_AMDGPU_ISD_NUMBER
> };
> 
> diff --git a/lib/Target/R600/AMDGPUIndirectAddressing.cpp 
> b/lib/Target/R600/AMDGPUIndirectAddressing.cpp
> new file mode 100644
> index 0000000..4a5438c
> --- /dev/null
> +++ b/lib/Target/R600/AMDGPUIndirectAddressing.cpp
> @@ -0,0 +1,319 @@
> +//===-- AMDGPUIndirectAddressing.cpp - Indirect Adressing Support 
> ---------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +///
> +/// Instructions can use indirect addressing to index the register file as if 
> it
> +/// were memory.  This pass lowers RegisterLoad and RegisterStore instructions
> +/// to either a COPY or a MOV that uses indirect addressing.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "AMDGPU.h"
> +#include "R600InstrInfo.h"
> +#include "R600MachineFunctionInfo.h"
> +#include "llvm/CodeGen/MachineFunction.h"
> +#include "llvm/CodeGen/MachineFunctionPass.h"
> +#include "llvm/CodeGen/MachineInstrBuilder.h"
> +#include "llvm/CodeGen/MachineRegisterInfo.h"
> +#include "llvm/Support/Debug.h"
> +
> +using namespace llvm;
> +
> +namespace {
> +
> +class AMDGPUIndirectAddressingPass : public MachineFunctionPass {
> +
> +private:
> +  static char ID;
> +  const AMDGPUInstrInfo *TII;
> +
> +  bool regHasExplicitDef(MachineRegisterInfo &MRI, unsigned Reg) const;
> +
> +public:
> +  AMDGPUIndirectAddressingPass(TargetMachine &tm) :
> +    MachineFunctionPass(ID),
> +    TII(static_cast<const AMDGPUInstrInfo*>(tm.getInstrInfo()))
> +    { }
> +
> +  virtual bool runOnMachineFunction(MachineFunction &MF);
> +
> +  const char *getPassName() const { return "R600 Handle indirect 
> addressing"; }
> +
> +};
> +
> +} // End anonymous namespace
> +
> +char AMDGPUIndirectAddressingPass::ID = 0;
> +
> +FunctionPass *llvm::createAMDGPUIndirectAddressingPass(TargetMachine &tm) {
> +  return new AMDGPUIndirectAddressingPass(tm);
> +}
> +
> +bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction 
> &MF) {
> +  MachineRegisterInfo &MRI = MF.getRegInfo();
> +
> +  unsigned IndirectBegin = TII->getIndirectIndexBegin(MF);
> +  unsigned IndirectEnd = TII->getIndirectIndexEnd(MF);
> +
> +  // The map keeps track of the indirect address that is represented by
> +  // each virtual register. The key is the register and the value is the
> +  // indirect address it uses.
> +  std::map<unsigned, unsigned> RegisterAddressMap;
> +
> +  // First pass - Lower all of the RegisterStore instructions and track which
> +  // registers are live.
> +  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
> +                                                      BB != BB_E; ++BB) {
> +    // This map keeps track of the current live indirect registers.
> +    // The key is the address and the value is the register
> +    std::map<unsigned, unsigned> LiveAddressRegisterMap;
> +    MachineBasicBlock &MBB = *BB;
> +
> +    for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
> +                               I != MBB.end(); I = Next) {
> +      Next = llvm::next(I);
> +      MachineInstr &MI = *I;
> +
> +      if (!TII->isRegisterStore(MI)) {
> +        continue;
> +      }
> +
> +      // Lower RegisterStore
> +
> +      unsigned RegIndex = MI.getOperand(2).getImm();
> +      unsigned Channel = MI.getOperand(3).getImm();
> +      unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
> +      const TargetRegisterClass *IndirectStoreRegClass =
> +                  
> TII->getIndirectAddrStoreRegClass(MI.getOperand(0).getReg());
> +
> +      if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
> +        // Direct register access.
> +        unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
> +
> +        BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), DstReg)
> +                .addOperand(MI.getOperand(0));
> +
> +        RegisterAddressMap[DstReg] = Address;
> +        LiveAddressRegisterMap[Address] = DstReg;
> +      } else {
> +        // Indirect register access.
> +        MachineInstrBuilder MOV = TII->buildIndirectWrite(BB, I,
> +                                           MI.getOperand(0).getReg(), // Value
> +                                           Address,
> +                                           MI.getOperand(1).getReg()); // 
> Offset
> +        for (unsigned i = IndirectBegin; i <= IndirectEnd; ++i) {
> +          unsigned Addr = TII->calculateIndirectAddress(i, Channel);
> +          unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
> +          MOV.addReg(DstReg, RegState::Define | RegState::Implicit);
> +          RegisterAddressMap[DstReg] = Addr;
> +          LiveAddressRegisterMap[Addr] = DstReg;
> +        }
> +      }
> +      MI.eraseFromParent();
> +    }
> +
> +    // Update the live-ins of the succesor blocks
> +    for (MachineBasicBlock::succ_iterator Succ = MBB.succ_begin(),
> +                                          SuccEnd = MBB.succ_end();
> +                                          SuccEnd != Succ; ++Succ) {
> +      std::map<unsigned, unsigned>::const_iterator Key, KeyEnd;
> +      for (Key = LiveAddressRegisterMap.begin(),
> +           KeyEnd = LiveAddressRegisterMap.end(); KeyEnd != Key; ++Key) {
> +        (*Succ)->addLiveIn(Key->second);
> +      }
> +    }
> +  }
> +
> +  // Second pass - Lower the RegisterLoad instructions
> +  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
> +                                                      BB != BB_E; ++BB) {
> +    // Key is the address and the value is the register
> +    std::map<unsigned, unsigned> LiveAddressRegisterMap;
> +    MachineBasicBlock &MBB = *BB;
> +
> +    MachineBasicBlock::livein_iterator LI = MBB.livein_begin();
> +    while (LI != MBB.livein_end()) {
> +      std::vector<unsigned> PhiRegisters;
> +
> +      // Make sure this live in is used for indirect addressing
> +      if (RegisterAddressMap.find(*LI) == RegisterAddressMap.end()) {
> +        ++LI;
> +        continue;
> +      }
> +
> +      unsigned Address = RegisterAddressMap[*LI];
> +      LiveAddressRegisterMap[Address] = *LI;
> +      PhiRegisters.push_back(*LI);
> +
> +      // Check if there are other live in registers which map to the same
> +      // indirect address.
> +      for (MachineBasicBlock::livein_iterator LJ = llvm::next(LI),
> +                                              LE = MBB.livein_end();
> +                                              LJ != LE; ++LJ) {
> +        unsigned Reg = *LJ;
> +        if (RegisterAddressMap.find(Reg) == RegisterAddressMap.end()) {
> +          continue;
> +        }
> +
> +        if (RegisterAddressMap[Reg] == Address) {
> +          if (!regHasExplicitDef(MRI, Reg)) {
> +            continue;
> +          }
> +          PhiRegisters.push_back(Reg);
> +        }
> +      }
> +
> +      if (PhiRegisters.size() == 1) {
> +        // We don't need to insert a Phi instruction, so we can just add 
> the
> +        // registers to the live list for the block.
> +        LiveAddressRegisterMap[Address] = *LI;
> +        MBB.removeLiveIn(*LI);
> +      } else {
> +        // We need to insert a PHI, because we have the same address being
> +        // written in multiple predecessor blocks.
> +        const TargetRegisterClass *PhiDstClass =
> +                  
> TII->getIndirectAddrStoreRegClass(*(PhiRegisters.begin()));
> +        unsigned PhiDstReg = MRI.createVirtualRegister(PhiDstClass);
> +        MachineInstrBuilder Phi = BuildMI(MBB, MBB.begin(),
> +                                          MBB.findDebugLoc(MBB.begin()),
> +                                          TII->get(AMDGPU::PHI), PhiDstReg);
> +
> +        for (std::vector<unsigned>::const_iterator RI = 
> PhiRegisters.begin(),
> +                                                   RE = PhiRegisters.end();
> +                                                   RI != RE; ++RI) {
> +          unsigned Reg = *RI;
> +          MachineInstr *DefInst = MRI.getVRegDef(Reg);
> +          assert(DefInst);
> +          MachineBasicBlock *RegBlock = DefInst->getParent();
> +          Phi.addReg(Reg);
> +          Phi.addMBB(RegBlock);
> +          MBB.removeLiveIn(Reg);
> +        }
> +        RegisterAddressMap[PhiDstReg] = Address;
> +        LiveAddressRegisterMap[Address] = PhiDstReg;
> +      }
> +      LI = MBB.livein_begin();
> +    }
> +
> +    for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
> +                               I != MBB.end(); I = Next) {
> +      Next = llvm::next(I);
> +      MachineInstr &MI = *I;
> +
> +      if (!TII->isRegisterLoad(MI)) {
> +        if (MI.getOpcode() == AMDGPU::PHI) {
> +          continue;
> +        }
> +        // Check for indirect register defs
> +        for (unsigned OpIdx = 0, NumOperands = MI.getNumOperands();
> +                                 OpIdx < NumOperands; ++OpIdx) {
> +          MachineOperand &MO = MI.getOperand(OpIdx);
> +          if (MO.isReg() && MO.isDef() &&
> +              RegisterAddressMap.find(MO.getReg()) != RegisterAddressMap.end()) 
> {
> +            unsigned Reg = MO.getReg();
> +            unsigned LiveAddress = RegisterAddressMap[Reg];
> +            // Chain the live-ins
> +            if (LiveAddressRegisterMap.find(LiveAddress) != 
> RegisterAddressMap.end()) {

line wrap at 80 char

> +              MI.addOperand(MachineOperand::CreateReg(
> +                                  LiveAddressRegisterMap[LiveAddress],
> +                                  false, // isDef
> +                                  true,  // isImp
> +                                  true));  // isKill
> +            }
> +            LiveAddressRegisterMap[LiveAddress] = Reg;
> +          }
> +        }
> +        continue;
> +      }
> +
> +      const TargetRegisterClass *SuperIndirectRegClass =
> +                                                
> TII->getSuperIndirectRegClass();
> +      const TargetRegisterClass *IndirectLoadRegClass =
> +                                            
> TII->getIndirectAddrLoadRegClass();
> +      unsigned IndirectReg = MRI.createVirtualRegister(SuperIndirectRegClass);
> +
> +      unsigned RegIndex = MI.getOperand(2).getImm();
> +      unsigned Channel = MI.getOperand(3).getImm();
> +      unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
> +
> +      if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
> +        // Direct register access
> +        unsigned Reg = LiveAddressRegisterMap[Address];
> +        unsigned AddrReg = IndirectLoadRegClass->getRegister(Address);
> +
> +        if (regHasExplicitDef(MRI, Reg)) {
> +          // If the register we are reading from has an explicit def, then that
> +          // means it was written via a direct register access (i.e. COPY
> +          // or other instruction that doesn't use indirect addressing).  
> In
> +          // this case we know where the value has been stored, so we can just
> +          // issue a copy.
> +          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
> +                  MI.getOperand(0).getReg())
> +                  .addReg(Reg);
> +        } else {
> +          // If the register we are reading has an implicit def, then that
> +          // means it was written by an indirect register access (i.e. An
> +          // instruction that uses indirect addressing. 
> +          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
> +                   MI.getOperand(0).getReg())
> +                   .addReg(AddrReg);
> +        }
> +      } else {
> +        // Indirect register access
> +
> +        // Note on REQ_SEQUENCE instructons: You can't actually use the 
> register
> +        // it defines unless  you have an instruction that takes the defined
> +        // register class as an operand.
> +
> +        MachineInstrBuilder Sequence = BuildMI(MBB, I, MBB.findDebugLoc(I),
> +                                              
> TII->get(AMDGPU::REG_SEQUENCE),
> +                                               IndirectReg);
> +        for (unsigned i = IndirectBegin; i <= IndirectEnd; ++i) {
> +          unsigned Addr = TII->calculateIndirectAddress(i, Channel);
> +          if (LiveAddressRegisterMap.find(Addr) == 
> LiveAddressRegisterMap.end()) {
> +            continue;
> +          }
> +          unsigned Reg = LiveAddressRegisterMap[Addr];
> +
> +          // We only need to use REG_SEQUENCE for explicit defs, since the
> +          // register coalescer won't do anything with the implicit defs.
> +          MachineInstr *DefInstr = MRI.getVRegDef(Reg);
> +          if (!DefInstr->getOperand(0).isReg() ||
> +              DefInstr->getOperand(0).getReg() != Reg) {
> +            continue;
> +          }
> +
> +          // Insert a REQ_SEQUENCE instruction to force the register allocator
> +          // to allocate the virtual register to the correct physical register.
> +          Sequence.addReg(LiveAddressRegisterMap[Addr]);
> +          Sequence.addImm(TII->getRegisterInfo().getIndirectSubReg(Addr));
> +        }
> +        MachineInstrBuilder Mov = TII->buildIndirectRead(BB, I,
> +                                           MI.getOperand(0).getReg(), // Value
> +                                           Address,
> +                                           MI.getOperand(1).getReg()); // 
> Offset
> +
> +
> +
> +        Mov.addReg(IndirectReg, RegState::Implicit | RegState::Kill);
> +
> +      }
> +      MI.eraseFromParent();
> +    }
> +  }
> +  return false;
> +}
> +
> +bool AMDGPUIndirectAddressingPass::regHasExplicitDef(MachineRegisterInfo 
> &MRI,
> +                                                  unsigned Reg) const {
> +  MachineInstr *DefInstr = MRI.getVRegDef(Reg);
> +  return DefInstr && DefInstr->getOperand(0).isReg() &&
> +         DefInstr->getOperand(0).getReg() == Reg;
> +}
> diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp 
> b/lib/Target/R600/AMDGPUInstrInfo.cpp
> index e42a46d..640707d 100644
> --- a/lib/Target/R600/AMDGPUInstrInfo.cpp
> +++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
> @@ -234,7 +234,16 @@ AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const 
> TargetRegisterClass *RC) const {
>    // TODO: Implement this function
>    return true;
> }
> - 
> +
> +bool AMDGPUInstrInfo::isRegisterStore(const MachineInstr &MI) const {
> +  return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_STORE;
> +}
> +
> +bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const {
> +  return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD;
> +}
> +
> +
> void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction 
> &MF,
>      DebugLoc DL) const {
>    MachineRegisterInfo &MRI = MF.getRegInfo();
> diff --git a/lib/Target/R600/AMDGPUInstrInfo.h 
> b/lib/Target/R600/AMDGPUInstrInfo.h
> index 32ac691..4ff63aa 100644
> --- a/lib/Target/R600/AMDGPUInstrInfo.h
> +++ b/lib/Target/R600/AMDGPUInstrInfo.h
> @@ -41,9 +41,10 @@ class MachineInstrBuilder;
> class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
> private:
>    const AMDGPURegisterInfo RI;
> -  TargetMachine &TM;
>    bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
>                            MachineBasicBlock &MBB) const;
> +protected:
> +  TargetMachine &TM;
> public:
>    explicit AMDGPUInstrInfo(TargetMachine &tm);
> 
> @@ -131,12 +132,66 @@ public:
>    bool isAExtLoadInst(llvm::MachineInstr *MI) const;
>    bool isStoreInst(llvm::MachineInstr *MI) const;
>    bool isTruncStoreInst(llvm::MachineInstr *MI) const;
> +  bool isRegisterStore(const MachineInstr &MI) const;
> +  bool isRegisterLoad(const MachineInstr &MI) const;
> +
> +//===---------------------------------------------------------------------===//
> +// Pure virtual funtions to be implemented by sub-classes.
> +//===---------------------------------------------------------------------===//
> 
>    virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
>                                         int64_t Imm) const = 0;
>    virtual unsigned getIEQOpcode() const = 0;
>    virtual bool isMov(unsigned opcode) const = 0;
> 
> +  /// \returns the smallest register index that will be accessed by an 
> indirect
> +  /// read or write.
> +  virtual unsigned getIndirectIndexBegin(const MachineFunction &MF) const = 
> 0;
> +
> +  /// \returns the largest register index that will be accessed by an 
> indirect
> +  /// read or write.
> +  virtual unsigned getIndirectIndexEnd(const MachineFunction &MF) const = 
> 0;
> +
> +  /// \brief Calculate the "Indirect Address" for the given 
> \p RegIndex and
> +  ///        \p Channel
> +  ///
> +  /// We model indirect addressing using a virtual address space that can be
> +  /// accesed with loads and stores.  The "Indirect Address" is the 
> memory
> +  /// address in this virtual address space that maps to the given \p 
> RegIndex
> +  /// and \p Channel.
> +  virtual unsigned calculateIndirectAddress(unsigned RegIndex,
> +                                            unsigned Channel) const = 0;
> +
> +  /// \returns The register class to be used for storing values to an
> +  /// "Indirect Address" .
> +  virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
> +                                                  unsigned SourceReg) const = 
> 0;
> +
> +  /// \returns The register class to be used for loading values from
> +  /// an "Indirect Address" .
> +  virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const = 0;
> +
> +  /// \brief Build instruction(s) for an indirect register write.
> +  ///
> +  /// \returns The instruction that performs the indirect register write
> +  virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
> +                                    MachineBasicBlock::iterator I,
> +                                    unsigned ValueReg, unsigned Address,
> +                                    unsigned OffsetReg) const = 0;
> +
> +  /// \brief Build instruction(s) for an indirect register read.
> +  ///
> +  /// \returns The instruction that performs the indirect register read
> +  virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
> +                                    MachineBasicBlock::iterator I,
> +                                    unsigned ValueReg, unsigned Address,
> +                                    unsigned OffsetReg) const = 0;
> +
> +  /// \returns the register class whose sub registers are the set of all
> +  /// possible registers that can be used for indirect addressing.
> +  virtual const TargetRegisterClass *getSuperIndirectRegClass() const = 0;
> +
> +
>    /// \brief Convert the AMDIL MachineInstr to a supported ISA
>    /// MachineInstr
>    virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
> @@ -146,4 +201,7 @@ public:
> 
> } // End llvm namespace
> 
> +#define AMDGPU_FLAG_REGISTER_LOAD  (1UL << 63)
> +#define AMDGPU_FLAG_REGISTER_STORE (1UL << 62)
> +
> #endif // AMDGPUINSTRINFO_H
> diff --git a/lib/Target/R600/AMDGPUInstrInfo.td 
> b/lib/Target/R600/AMDGPUInstrInfo.td
> index 96368e8..b66ae87 100644
> --- a/lib/Target/R600/AMDGPUInstrInfo.td
> +++ b/lib/Target/R600/AMDGPUInstrInfo.td
> @@ -72,3 +72,11 @@ def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", 
> SDTIntBinOp,
> def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
> 
> def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;
> +
> +def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
> +                          SDTypeProfile<1, 2, [SDTCisPtrTy<1>, 
> SDTCisInt<2>]>,
> +                          [SDNPHasChain, SDNPMayLoad]>;
> +
> +def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",
> +                           SDTypeProfile<0, 3, [SDTCisPtrTy<1>, 
> SDTCisInt<2>]>,
> +                           [SDNPHasChain, SDNPMayStore]>;
> diff --git a/lib/Target/R600/AMDGPUInstructions.td 
> b/lib/Target/R600/AMDGPUInstructions.td
> index e634d20..3dee004 100644
> --- a/lib/Target/R600/AMDGPUInstructions.td
> +++ b/lib/Target/R600/AMDGPUInstructions.td
> @@ -13,8 +13,8 @@
> //===----------------------------------------------------------------------===//
> 
> class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> 
> : Instruction {
> -  field bits<16> AMDILOp = 0;
> -  field bits<3> Gen = 0;
> +  field bit isRegisterLoad = 0;
> +  field bit isRegisterStore = 0;
> 
>    let Namespace = "AMDGPU";
>    let OutOperandList = outs;
> @@ -22,8 +22,9 @@ class AMDGPUInst <dag outs, dag ins, string asm, 
> list<dag> pattern> : Instructio
>    let AsmString = asm;
>    let Pattern = pattern;
>    let Itinerary = NullALU;
> -  let TSFlags{42-40} = Gen;
> -  let TSFlags{63-48} = AMDILOp;
> +
> +  let TSFlags{63} = isRegisterLoad;
> +  let TSFlags{62} = isRegisterStore;
> }
> 
> class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> 
> pattern>
> @@ -101,7 +102,9 @@ def FP_ONE : PatLeaf <
>    [{return N->isExactlyValue(1.0);}]
>> ;
> 
> -let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1  in {
> +let isCodeGenOnly = 1, isPseudo = 1 in {
> +
> +let usesCustomInserter = 1  in {
> 
> class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
>    (outs rc:$dst),
> @@ -131,7 +134,31 @@ def SHADER_TYPE : AMDGPUShaderInst <
>    [(int_AMDGPU_shader_type imm:$type)]
>> ;
> 
> -} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
> +} // usesCustomInserter = 1
> +
> +multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
> +                    ComplexPattern addrPat> {
> +  def RegisterLoad : AMDGPUShaderInst <
> +    (outs dstClass:$dst),
> +    (ins addrClass:$addr, i32imm:$chan),
> +    "RegisterLoad $dst, $addr",
> +    [(set (i32 dstClass:$dst), (AMDGPUregister_load addrPat:$addr,
> +                                                    (i32 timm:$chan)))]
> +  > {
> +    let isRegisterLoad = 1;
> +  }
> +
> +  def RegisterStore : AMDGPUShaderInst <
> +    (outs),
> +    (ins dstClass:$val, addrClass:$addr, i32imm:$chan),
> +    "RegisterStore $val, $addr",
> +    [(AMDGPUregister_store (i32 dstClass:$val), addrPat:$addr, (i32 
> timm:$chan))]
> +  > {
> +    let isRegisterStore = 1;
> +  }
> +}
> +
> +} // End isCodeGenOnly = 1, isPseudo = 1
> 
> /* Generic helper patterns for intrinsics */
> /* -------------------------------------- */
> diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp 
> b/lib/Target/R600/AMDGPURegisterInfo.cpp
> index eeafec8..19f89da 100644
> --- a/lib/Target/R600/AMDGPURegisterInfo.cpp
> +++ b/lib/Target/R600/AMDGPURegisterInfo.cpp
> @@ -47,5 +47,76 @@ unsigned AMDGPURegisterInfo::getFrameRegister(const 
> MachineFunction &MF) const {
>    return 0;
> }
> 
> +unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const {
> +
> +  switch(IndirectIndex) {
> +  case 0: return AMDGPU::indirect_0;
> +  case 1: return AMDGPU::indirect_1;
> +  case 2: return AMDGPU::indirect_2;
> +  case 3: return AMDGPU::indirect_3;
> +  case 4: return AMDGPU::indirect_4;
> +  case 5: return AMDGPU::indirect_5;
> +  case 6: return AMDGPU::indirect_6;
> +  case 7: return AMDGPU::indirect_7;
> +  case 8: return AMDGPU::indirect_8;
> +  case 9: return AMDGPU::indirect_9;
> +  case 10: return AMDGPU::indirect_10;
> +  case 11: return AMDGPU::indirect_11;
> +  case 12: return AMDGPU::indirect_12;
> +  case 13: return AMDGPU::indirect_13;
> +  case 14: return AMDGPU::indirect_14;
> +  case 15: return AMDGPU::indirect_15;
> +  case 16: return AMDGPU::indirect_16;
> +  case 17: return AMDGPU::indirect_17;
> +  case 18: return AMDGPU::indirect_18;
> +  case 19: return AMDGPU::indirect_19;
> +  case 20: return AMDGPU::indirect_20;
> +  case 21: return AMDGPU::indirect_21;
> +  case 22: return AMDGPU::indirect_22;
> +  case 23: return AMDGPU::indirect_23;
> +  case 24: return AMDGPU::indirect_24;
> +  case 25: return AMDGPU::indirect_25;
> +  case 26: return AMDGPU::indirect_26;
> +  case 27: return AMDGPU::indirect_27;
> +  case 28: return AMDGPU::indirect_28;
> +  case 29: return AMDGPU::indirect_29;
> +  case 30: return AMDGPU::indirect_30;
> +  case 31: return AMDGPU::indirect_31;
> +  case 32: return AMDGPU::indirect_32;
> +  case 33: return AMDGPU::indirect_33;
> +  case 34: return AMDGPU::indirect_34;
> +  case 35: return AMDGPU::indirect_35;
> +  case 36: return AMDGPU::indirect_36;
> +  case 37: return AMDGPU::indirect_37;
> +  case 38: return AMDGPU::indirect_38;
> +  case 39: return AMDGPU::indirect_39;
> +  case 40: return AMDGPU::indirect_40;
> +  case 41: return AMDGPU::indirect_41;
> +  case 42: return AMDGPU::indirect_42;
> +  case 43: return AMDGPU::indirect_43;
> +  case 44: return AMDGPU::indirect_44;
> +  case 45: return AMDGPU::indirect_45;
> +  case 46: return AMDGPU::indirect_46;
> +  case 47: return AMDGPU::indirect_47;
> +  case 48: return AMDGPU::indirect_48;
> +  case 49: return AMDGPU::indirect_49;
> +  case 50: return AMDGPU::indirect_50;
> +  case 51: return AMDGPU::indirect_51;
> +  case 52: return AMDGPU::indirect_52;
> +  case 53: return AMDGPU::indirect_53;
> +  case 54: return AMDGPU::indirect_54;
> +  case 55: return AMDGPU::indirect_55;
> +  case 56: return AMDGPU::indirect_56;
> +  case 57: return AMDGPU::indirect_57;
> +  case 58: return AMDGPU::indirect_58;
> +  case 59: return AMDGPU::indirect_59;
> +  case 60: return AMDGPU::indirect_60;
> +  case 61: return AMDGPU::indirect_61;
> +  case 62: return AMDGPU::indirect_62;
> +  case 63: return AMDGPU::indirect_63;
> +  default: llvm_unreachable("indirect index out of range");
> +  }
> +}
> +
> #define GET_REGINFO_TARGET_DESC
> #include "AMDGPUGenRegisterInfo.inc"
> diff --git a/lib/Target/R600/AMDGPURegisterInfo.h 
> b/lib/Target/R600/AMDGPURegisterInfo.h
> index 76ee7ae..5007ff5 100644
> --- a/lib/Target/R600/AMDGPURegisterInfo.h
> +++ b/lib/Target/R600/AMDGPURegisterInfo.h
> @@ -56,6 +56,8 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
>                             RegScavenger *RS) const;
>    unsigned getFrameRegister(const MachineFunction &MF) const;
> 
> +  unsigned getIndirectSubReg(unsigned IndirectIndex) const;
> +
> };
> 
> } // End namespace llvm
> diff --git a/lib/Target/R600/AMDGPURegisterInfo.td 
> b/lib/Target/R600/AMDGPURegisterInfo.td
> index 8181e02..8c427fc 100644
> --- a/lib/Target/R600/AMDGPURegisterInfo.td
> +++ b/lib/Target/R600/AMDGPURegisterInfo.td
> @@ -16,6 +16,14 @@ let Namespace = "AMDGPU" in {
>    def sel_y : SubRegIndex;
>    def sel_z : SubRegIndex;
>    def sel_w : SubRegIndex;
> +
> +
> +foreach Index = 0-63 in {
> +  def indirect_#Index : SubRegIndex;
> +}
> +
> +def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">;
> +
> }
> 
> include "R600RegisterInfo.td"
> diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp 
> b/lib/Target/R600/AMDGPUTargetMachine.cpp
> index 7b069e7..dab3497 100644
> --- a/lib/Target/R600/AMDGPUTargetMachine.cpp
> +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
> @@ -102,6 +102,12 @@ AMDGPUPassConfig::addPreISel() {
> bool AMDGPUPassConfig::addInstSelector() {
>    addPass(createAMDGPUPeepholeOpt(*TM));
>    addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
> +
> +  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
> +  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
> +    // This callbacks this pass uses are not implemented yet on SI.
> +    addPass(createAMDGPUIndirectAddressingPass(*TM));
> +  }
>    return false;
> }
> 
> diff --git a/lib/Target/R600/AMDGPUTargetMachine.h 
> b/lib/Target/R600/AMDGPUTargetMachine.h
> index 399e55c..5a1dcf4 100644
> --- a/lib/Target/R600/AMDGPUTargetMachine.h
> +++ b/lib/Target/R600/AMDGPUTargetMachine.h
> @@ -15,9 +15,9 @@
> #ifndef AMDGPU_TARGET_MACHINE_H
> #define AMDGPU_TARGET_MACHINE_H
> 
> +#include "AMDGPUFrameLowering.h"
> #include "AMDGPUInstrInfo.h"
> #include "AMDGPUSubtarget.h"
> -#include "AMDILFrameLowering.h"
> #include "AMDILIntrinsicInfo.h"
> #include "R600ISelLowering.h"
> #include "llvm/ADT/OwningPtr.h"
> diff --git a/lib/Target/R600/AMDILFrameLowering.cpp 
> b/lib/Target/R600/AMDILFrameLowering.cpp
> deleted file mode 100644
> index 9ad495a..0000000
> --- a/lib/Target/R600/AMDILFrameLowering.cpp
> +++ /dev/null
> @@ -1,47 +0,0 @@
> -//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ 
> -*-===//
> -//
> -//                     The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//==-----------------------------------------------------------------------===//
> -//
> -/// \file
> -/// \brief Interface to describe a layout of a stack frame on a AMDGPU 
> target
> -/// machine.
> -//
> -//===----------------------------------------------------------------------===//
> -#include "AMDILFrameLowering.h"
> -#include "llvm/CodeGen/MachineFrameInfo.h"
> -
> -using namespace llvm;
> -AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
> -    int LAO, unsigned TransAl)
> -  : TargetFrameLowering(D, StackAl, LAO, TransAl) {
> -}
> -
> -AMDGPUFrameLowering::~AMDGPUFrameLowering() {
> -}
> -
> -int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
> -                                         int FI) const {
> -  const MachineFrameInfo *MFI = MF.getFrameInfo();
> -  return MFI->getObjectOffset(FI);
> -}
> -
> -const TargetFrameLowering::SpillSlot *
> -AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
> -  NumEntries = 0;
> -  return 0;
> -}
> -void
> -AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
> -}
> -void
> -AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock 
> &MBB) const {
> -}
> -bool
> -AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
> -  return false;
> -}
> diff --git a/lib/Target/R600/AMDILFrameLowering.h 
> b/lib/Target/R600/AMDILFrameLowering.h
> deleted file mode 100644
> index 51337c3..0000000
> --- a/lib/Target/R600/AMDILFrameLowering.h
> +++ /dev/null
> @@ -1,40 +0,0 @@
> -//===--------------------- AMDILFrameLowering.h -----------------*- C++ 
> -*-===//
> -//
> -//                     The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -/// \file
> -/// \brief Interface to describe a layout of a stack frame on a AMDIL 
> target
> -/// machine.
> -//
> -//===----------------------------------------------------------------------===//
> -#ifndef AMDILFRAME_LOWERING_H
> -#define AMDILFRAME_LOWERING_H
> -
> -#include "llvm/CodeGen/MachineFunction.h"
> -#include "llvm/Target/TargetFrameLowering.h"
> -
> -namespace llvm {
> -
> -/// \brief Information about the stack frame layout on the AMDGPU targets.
> -///
> -/// It holds the direction of the stack growth, the known stack alignment on
> -/// entry to each function, and the offset to the locals area.
> -/// See TargetFrameInfo for more comments.
> -class AMDGPUFrameLowering : public TargetFrameLowering {
> -public:
> -  AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
> -                      unsigned TransAl = 1);
> -  virtual ~AMDGPUFrameLowering();
> -  virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
> -  virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) 
> const;
> -  virtual void emitPrologue(MachineFunction &MF) const;
> -  virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock 
> &MBB) const;
> -  virtual bool hasFP(const MachineFunction &MF) const;
> -};
> -} // namespace llvm
> -#endif // AMDILFRAME_LOWERING_H
> diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp 
> b/lib/Target/R600/AMDILISelDAGToDAG.cpp
> index 567b3e2..3f3e7e2 100644
> --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
> +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
> @@ -75,6 +75,7 @@ private:
>    bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& 
> Offset);
>    bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
>    bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue 
> &Offset);
> +  bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue 
> &Offset);
> 
>    // Include the pieces autogenerated from the target description.
> #include "AMDGPUGenDAGISel.inc"
> @@ -161,16 +162,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
>    }
>    switch (Opc) {
>    default: break;
> -  case ISD::FrameIndex: {
> -    if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
> -      unsigned int FI = FIN->getIndex();
> -      EVT OpVT = N->getValueType(0);
> -      unsigned int NewOpc = AMDGPU::COPY;
> -      SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
> -      return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
> -    }
> -    break;
> -  }
>    case ISD::ConstantFP:
>    case ISD::Constant: {
>      const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
> @@ -565,3 +556,22 @@ bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, 
> SDValue& Base,
> 
>    return true;
> }
> +
> +bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
> +                                            SDValue &Offset) {
> +  ConstantSDNode *C;
> +
> +  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
> +    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
> +    Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
> +  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) 
> &&
> +            (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
> +    Base = Addr.getOperand(0);
> +    Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
> +  } else {
> +    Base = Addr;
> +    Offset = CurDAG->getTargetConstant(0, MVT::i32);
> +  }
> +
> +  return true;
> +}
> diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
> index c49304f..8ef9f8c 100644
> --- a/lib/Target/R600/CMakeLists.txt
> +++ b/lib/Target/R600/CMakeLists.txt
> @@ -25,6 +25,7 @@ add_llvm_target(AMDGPUCodeGen
>    AMDILPeepholeOptimizer.cpp
>    AMDILSIDevice.cpp
>    AMDGPUAsmPrinter.cpp
> +  AMDGPUIndirectAddressing.cpp
>    AMDGPUMCInstLower.cpp
>    AMDGPUSubtarget.cpp
>    AMDGPUTargetMachine.cpp
> diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp 
> b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> index e76c6c8..fb17ab7 100644
> --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> @@ -105,10 +105,7 @@ void AMDGPUInstPrinter::printOMOD(const MCInst *MI, 
> unsigned OpNo,
> 
> void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo,
>                                   raw_ostream &O) {
> -  const MCOperand &Op = MI->getOperand(OpNo);
> -  if (Op.getImm() != 0) {
> -    O << " + " << Op.getImm();
> -  }
> +  printIfSet(MI, OpNo, O, "+");
> }
> 
> void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
> diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h
> index e19eea3..16cfcf5 100644
> --- a/lib/Target/R600/R600Defines.h
> +++ b/lib/Target/R600/R600Defines.h
> @@ -49,6 +49,9 @@ namespace R600_InstFlag {
> #define HW_REG_MASK 0x1ff
> #define HW_CHAN_SHIFT 9
> 
> +#define GET_REG_CHAN(reg) ((reg) >> HW_CHAN_SHIFT)
> +#define GET_REG_INDEX(reg) ((reg) & HW_REG_MASK)
> +
> namespace R600Operands {
>    enum Ops {
>      DST,
> diff --git a/lib/Target/R600/R600ISelLowering.cpp 
> b/lib/Target/R600/R600ISelLowering.cpp
> index 773122b..aad71d4 100644
> --- a/lib/Target/R600/R600ISelLowering.cpp
> +++ b/lib/Target/R600/R600ISelLowering.cpp
> @@ -18,6 +18,7 @@
> #include "R600MachineFunctionInfo.h"
> #include "llvm/Argument.h"
> #include "llvm/Function.h"
> +#include "llvm/CodeGen/MachineFrameInfo.h"
> #include "llvm/CodeGen/MachineInstrBuilder.h"
> #include "llvm/CodeGen/MachineRegisterInfo.h"
> #include "llvm/CodeGen/SelectionDAG.h"
> @@ -71,11 +72,22 @@ R600TargetLowering::R600TargetLowering(TargetMachine 
> &TM) :
>    setOperationAction(ISD::SELECT, MVT::i32, Custom);
>    setOperationAction(ISD::SELECT, MVT::f32, Custom);
> 
> +  // Legalize loads and stores to the private address space.
> +  setOperationAction(ISD::LOAD, MVT::i32, Custom);
> +  setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
> +  setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
> +  setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
> +  setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
> +  setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
> +  setOperationAction(ISD::STORE, MVT::i8, Custom);
>    setOperationAction(ISD::STORE, MVT::i32, Custom);
> +  setOperationAction(ISD::STORE, MVT::v2i32, Custom);
>    setOperationAction(ISD::STORE, MVT::v4i32, Custom);
> 
>    setOperationAction(ISD::LOAD, MVT::i32, Custom);
>    setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
> +  setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
> +
>    setTargetDAGCombine(ISD::FP_ROUND);
>    setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
> 
> @@ -376,6 +388,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, 
> SelectionDAG &DAG) const
>    case ISD::STORE: return LowerSTORE(Op, DAG);
>    case ISD::LOAD: return LowerLOAD(Op, DAG);
>    case ISD::FPOW: return LowerFPOW(Op, DAG);
> +  case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
>    case ISD::INTRINSIC_VOID: {
>      SDValue Chain = Op.getOperand(0);
>      unsigned IntrinsicID =
> @@ -516,6 +529,10 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
>      DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
>      return;
>    }
> +  case ISD::STORE:
> +    SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
> +    Results.push_back(SDValue(Node, 0));
> +    return;
>    }
> }
> 
> @@ -583,6 +600,20 @@ SDValue 
> R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
>                       false, false, false, 0);
> }
> 
> +SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) 
> const {
> +
> +  MachineFunction &MF = DAG.getMachineFunction();
> +  const AMDGPUFrameLowering *TFL =
> +   static_cast<const 
> AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
> +
> +  FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
> +  assert(FIN);
> +
> +  unsigned FrameIndex = FIN->getIndex();
> +  unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
> +  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
> +}
> +
> SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
>    DebugLoc DL = Op.getDebugLoc();
>    EVT VT = Op.getValueType();
> @@ -797,6 +828,61 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, 
> SelectionDAG &DAG) const {
>    return Cond;
> }
> 
> +/// LLVM generates byte-addresed pointers.  For indirect addressing, we need to
> +/// convert these pointers to a register index.  Each register holds
> +/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
> +/// \p StackWidth, which tells us how many of the 4 sub-registrers will be 
> used
> +/// for indirect addressing.
> +SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
> +                                               unsigned StackWidth,
> +                                               SelectionDAG &DAG) const {
> +  unsigned SRLPad;
> +  switch(StackWidth) {
> +  case 1:
> +    SRLPad = 2;
> +    break;
> +  case 2:
> +    SRLPad = 3;
> +    break;
> +  case 4:
> +    SRLPad = 4;
> +    break;
> +  default: llvm_unreachable("Invalid stack width");
> +  }
> +
> +  return DAG.getNode(ISD::SRL, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr,
> +                     DAG.getConstant(SRLPad, MVT::i32));
> +}
> +
> +void R600TargetLowering::getStackAddress(unsigned StackWidth,
> +                                         unsigned ElemIdx,
> +                                         unsigned &Channel,
> +                                         unsigned &PtrIncr) const {
> +  switch (StackWidth) {
> +  default:
> +  case 1:
> +    Channel = 0;
> +    if (ElemIdx > 0) {
> +      PtrIncr = 1;
> +    } else {
> +      PtrIncr = 0;
> +    }
> +    break;
> +  case 2:
> +    Channel = ElemIdx % 2;
> +    if (ElemIdx == 2) {
> +      PtrIncr = 1;
> +    } else {
> +      PtrIncr = 0;
> +    }
> +    break;
> +  case 4:
> +    Channel = ElemIdx;
> +    PtrIncr = 0;
> +    break;
> +  }
> +}
> +
> SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const 
> {
>    DebugLoc DL = Op.getDebugLoc();
>    StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
> @@ -818,7 +904,52 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, 
> SelectionDAG &DAG) const {
>      }
>      return Chain;
>    }
> -  return SDValue();
> +
> +  EVT ValueVT = Value.getValueType();
> +
> +  if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
> +    return SDValue();
> +  }
> +
> +  // Lowering for indirect addressing
> +
> +  const MachineFunction &MF = DAG.getMachineFunction();
> +  const AMDGPUFrameLowering *TFL = static_cast<const 
> AMDGPUFrameLowering*>(
> +                                        
> getTargetMachine().getFrameLowering());
> +  unsigned StackWidth = TFL->getStackWidth(MF);
> +
> +  Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
> +
> +  if (ValueVT.isVector()) {
> +    unsigned NumElemVT = ValueVT.getVectorNumElements();
> +    EVT ElemVT = ValueVT.getVectorElementType();
> +    SDValue Stores[4];
> +
> +    assert(NumElemVT >= StackWidth && "Stack width cannot be 
> greater than "
> +                                      "vector width in load");
> +
> +    for (unsigned i = 0; i < NumElemVT; ++i) {
> +      unsigned Channel, PtrIncr;
> +      getStackAddress(StackWidth, i, Channel, PtrIncr);
> +      Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
> +                        DAG.getConstant(PtrIncr, MVT::i32));
> +      SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
> +                                 Value, DAG.getConstant(i, MVT::i32));
> +
> +      Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
> +                              Chain, Elem, Ptr,
> +                              DAG.getTargetConstant(Channel, MVT::i32));
> +    }
> +     Chain =  DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
> +   } else {
> +    if (ValueVT == MVT::i8) {
> +      Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
> +    }
> +    Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, 
> Value, Ptr,
> +    DAG.getTargetConstant(0, MVT::i32)); // Channel 
> +  }
> +
> +  return Chain;
> }
> 
> // return (512 + (kc_bank << 12)
> @@ -907,7 +1038,53 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, 
> SelectionDAG &DAG) const
>      return DAG.getMergeValues(MergedValues, 2, DL);
>    }
> 
> -  return SDValue();
> +  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
> +    return SDValue();
> +  }
> +
> +  // Lowering for indirect addressing
> +  const MachineFunction &MF = DAG.getMachineFunction();
> +  const AMDGPUFrameLowering *TFL = static_cast<const 
> AMDGPUFrameLowering*>(
> +                                        
> getTargetMachine().getFrameLowering());
> +  unsigned StackWidth = TFL->getStackWidth(MF);
> +
> +  Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
> +
> +  if (VT.isVector()) {
> +    unsigned NumElemVT = VT.getVectorNumElements();
> +    EVT ElemVT = VT.getVectorElementType();
> +    SDValue Loads[4];
> +
> +    assert(NumElemVT >= StackWidth && "Stack width cannot be 
> greater than "
> +                                      "vector width in load");
> +
> +    for (unsigned i = 0; i < NumElemVT; ++i) {
> +      unsigned Channel, PtrIncr;
> +      getStackAddress(StackWidth, i, Channel, PtrIncr);
> +      Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
> +                        DAG.getConstant(PtrIncr, MVT::i32));
> +      Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
> +                             Chain, Ptr,
> +                             DAG.getTargetConstant(Channel, MVT::i32),
> +                             Op.getOperand(2));
> +    }
> +    for (unsigned i = NumElemVT; i < 4; ++i) {
> +      Loads[i] = DAG.getUNDEF(ElemVT);
> +    }
> +    EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
> +    LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
> +  } else {
> +    LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
> +                              Chain, Ptr,
> +                              DAG.getTargetConstant(0, MVT::i32), // Channel
> +                              Op.getOperand(2));
> +  }
> +
> +  SDValue Ops[2];
> +  Ops[0] = LoweredLoad;
> +  Ops[1] = Chain;
> +
> +  return DAG.getMergeValues(Ops, 2, DL);
> }
> 
> SDValue R600TargetLowering::LowerFPOW(SDValue Op,
> diff --git a/lib/Target/R600/R600ISelLowering.h 
> b/lib/Target/R600/R600ISelLowering.h
> index c141d50..afa3897 100644
> --- a/lib/Target/R600/R600ISelLowering.h
> +++ b/lib/Target/R600/R600ISelLowering.h
> @@ -64,7 +64,12 @@ private:
>    SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
>    SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
>    SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
> -  
> +  SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
> +
> +  SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
> +                                          SelectionDAG &DAG) const;
> +  void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
> +                       unsigned &Channel, unsigned &PtrIncr) const;
>    bool isZero(SDValue Op) const;
> };
> 
> diff --git a/lib/Target/R600/R600InstrInfo.cpp 
> b/lib/Target/R600/R600InstrInfo.cpp
> index f7daaf8..0fefbab 100644
> --- a/lib/Target/R600/R600InstrInfo.cpp
> +++ b/lib/Target/R600/R600InstrInfo.cpp
> @@ -16,8 +16,12 @@
> #include "AMDGPUTargetMachine.h"
> #include "AMDGPUSubtarget.h"
> #include "R600Defines.h"
> +#include "R600MachineFunctionInfo.h"
> #include "R600RegisterInfo.h"
> #include "llvm/CodeGen/MachineInstrBuilder.h"
> +#include "llvm/CodeGen/MachineFrameInfo.h"
> +#include "llvm/CodeGen/MachineRegisterInfo.h"
> +#include "llvm/Instructions.h"
> 
> #define GET_INSTRINFO_CTOR
> #include "AMDGPUGenDFAPacketizer.inc"
> @@ -464,6 +468,124 @@ unsigned int R600InstrInfo::getInstrLatency(const 
> InstrItineraryData *ItinData,
>    return 2;
> }
> 
> +unsigned R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) 
> const {
> +  const MachineRegisterInfo &MRI = MF.getRegInfo();
> +  const R600MachineFunctionInfo *MFI = 
> MF.getInfo<R600MachineFunctionInfo>();
> +  unsigned Offset = 0;
> +
> +  if (MRI.livein_empty() && MFI->ReservedRegs.empty()) {
> +    return 0;
> +  }
> +
> +  for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
> +                                            LE = MRI.livein_end();
> +                                            LI != LE; ++LI) {
> +    Offset = std::max(Offset,
> +                      
> (unsigned)GET_REG_INDEX(RI.getEncodingValue(LI->first)));
> +  }
> +
> +  for (std::vector<unsigned>::const_iterator RRI = 
> MFI->ReservedRegs.begin(),
> +                                             RRE = MFI->ReservedRegs.end();
> +                                             RRI != RRE; ++RRI) {
> +    Offset = std::max(Offset,
> +                     (unsigned GET_REG_INDEX(RI.getEncodingValue(*RRI))));
> +  }
> +
> +  return Offset + 1;
> +}
> +
> +unsigned R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) 
> const {
> +  unsigned Offset = 0;
> +  const MachineFrameInfo *MFI = MF.getFrameInfo();
> +
> +  // Variable sized objects are not supported
> +  assert(!MFI->hasVarSizedObjects());
> +
> +  // Only one stack object is supported at the moment
> +//  assert(MFI->getNumObjects() <= 1);
> +
> +  if (MFI->getNumObjects() == 0) {
> +    return 0;
> +  }
> +
> +  Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
> +
> +  return getIndirectIndexBegin(MF) + Offset;
> +}
> +
> +std::vector<unsigned> R600InstrInfo::getIndirectReservedRegs(
> +                                             const MachineFunction &MF) 
> const {
> +  const AMDGPUFrameLowering *TFL =
> +                 static_cast<const 
> AMDGPUFrameLowering*>(TM.getFrameLowering());
> +  unsigned StackWidth = TFL->getStackWidth(MF);
> +  unsigned End = getIndirectIndexEnd(MF);
> +
> +  std::vector<unsigned> Regs;
> +
> +  for (unsigned Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
> +    unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
> +    Regs.push_back(SuperReg);
> +    for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
> +      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + 
> Chan);
> +      Regs.push_back(Reg);
> +    }
> +  }
> +  return Regs;
> +}
> +
> +unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
> +                                                 unsigned Channel) const {
> +  return (4 * RegIndex) + Channel;
> +}
> +
> +const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass(
> +                                                     unsigned SourceReg) const 
> {
> +  return &AMDGPU::R600_TReg32RegClass;
> +}
> +
> +const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const {
> +  return &AMDGPU::TRegMemRegClass;
> +}
> +
> +MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
> +                                       MachineBasicBlock::iterator I,
> +                                       unsigned ValueReg, unsigned Address,
> +                                       unsigned OffsetReg) const {
> +  unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
> +  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
> +                                               AMDGPU::AR_X, OffsetReg);
> +  setImmOperand(MOVA, R600Operands::WRITE, 0);
> +
> +  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
> +                                      AddrReg, ValueReg)
> +                                      .addReg(AMDGPU::AR_X, 
> RegState::Implicit);
> +  setImmOperand(Mov, R600Operands::DST_REL, 1);
> +  return Mov;
> +}
> +
> +MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
> +                                       MachineBasicBlock::iterator I,
> +                                       unsigned ValueReg, unsigned Address,
> +                                       unsigned OffsetReg) const {
> +  unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
> +  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
> +                                                       AMDGPU::AR_X,
> +                                                       OffsetReg);
> +  setImmOperand(MOVA, R600Operands::WRITE, 0);
> +  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
> +                                      ValueReg,
> +                                      AddrReg)
> +                                      .addReg(AMDGPU::AR_X, 
> RegState::Implicit);
> +  setImmOperand(Mov, R600Operands::SRC0_REL, 1);
> +
> +  return Mov;
> +}
> +
> +const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const {
> +  return &AMDGPU::IndirectRegRegClass;
> +}
> +
> +
> MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock 
> &MBB,
>                                                    MachineBasicBlock::iterator 
> I,
>                                                    unsigned Opcode,
> diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
> index 6bb0ca9..a50d18e 100644
> --- a/lib/Target/R600/R600InstrInfo.h
> +++ b/lib/Target/R600/R600InstrInfo.h
> @@ -114,6 +114,38 @@ namespace llvm {
>    virtual int getInstrLatency(const InstrItineraryData *ItinData,
>                                SDNode *Node) const { return 1;}
> 
> +  /// \returns a list of all the registers that may be accesed using 
> indirect
> +  /// addressing.
> +  std::vector<unsigned> getIndirectReservedRegs(const MachineFunction 
> &MF) const;
> +
> +  virtual unsigned getIndirectIndexBegin(const MachineFunction &MF) const;
> +
> +  virtual unsigned getIndirectIndexEnd(const MachineFunction &MF) const;
> +
> +
> +  virtual unsigned calculateIndirectAddress(unsigned RegIndex,
> +                                            unsigned Channel) const;
> +
> +  virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
> +                                                      unsigned SourceReg) 
> const;
> +
> +  virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
> +
> +  virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
> +                                  MachineBasicBlock::iterator I,
> +                                  unsigned ValueReg, unsigned Address,
> +                                  unsigned OffsetReg) const;
> +
> +  virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
> +                                  MachineBasicBlock::iterator I,
> +                                  unsigned ValueReg, unsigned Address,
> +                                  unsigned OffsetReg) const;
> +
> +  virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
> +
> +
> +  ///buildDefaultInstruction - This function returns a MachineInstr with
> +  /// all the instruction modifiers initialized to their default values.
>    /// You can use this function to avoid manually specifying each instruction
>    /// modifier operand when building a new instruction.
>    ///
> diff --git a/lib/Target/R600/R600Instructions.td 
> b/lib/Target/R600/R600Instructions.td
> index 04b83bc..a6a0213 100644
> --- a/lib/Target/R600/R600Instructions.td
> +++ b/lib/Target/R600/R600Instructions.td
> @@ -91,11 +91,16 @@ def UP : InstFlag <"printUpdatePred">;
> // default to 0.
> def LAST : InstFlag<"printLast", 1>;
> 
> +def FRAMEri : Operand<iPTR> {
> +  let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
> +}
> +
> def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], 
> []>;
> def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], 
> []>;
> def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], 
> []>;
> def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, 
> "SelectGlobalValueConstantOffset", [], []>;
> def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, 
> "SelectGlobalValueVariableOffset", [], []>;
> +def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", 
> [], []>;
> 
> class R600ALU_Word0 {
>    field bits<32> Word0;
> @@ -1221,6 +1226,10 @@ let Predicates = [isEGorCayman] in {
>    defm DOT4_eg : DOT4_Common<0xBE>;
>    defm CUBE_eg : CUBE_Common<0xC0>;
> 
> +let hasSideEffects = 1 in {
> +  def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>;
> +}
> +
>    def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, 
> EXP_IEEE_eg>;
> 
>    def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
> @@ -1471,6 +1480,12 @@ def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
> 
> }
> 
> +//===----------------------------------------------------------------------===//
> +// Regist loads and stores - for indirect addressing
> +//===----------------------------------------------------------------------===//
> +
> +defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
> +
> let Predicates = [isCayman] in {
> 
> let isVector = 1 in { 
> diff --git a/lib/Target/R600/R600MachineFunctionInfo.h 
> b/lib/Target/R600/R600MachineFunctionInfo.h
> index 91f9de2..ebb2584 100644
> --- a/lib/Target/R600/R600MachineFunctionInfo.h
> +++ b/lib/Target/R600/R600MachineFunctionInfo.h
> @@ -13,6 +13,7 @@
> #ifndef R600MACHINEFUNCTIONINFO_H
> #define R600MACHINEFUNCTIONINFO_H
> 
> +#include "llvm/ADT/BitVector.h"
> #include "llvm/CodeGen/MachineFunction.h"
> #include "llvm/CodeGen/SelectionDAG.h"
> #include <vector>
> @@ -24,6 +25,7 @@ class R600MachineFunctionInfo : public MachineFunctionInfo {
> public:
>    R600MachineFunctionInfo(const MachineFunction &MF);
>    std::vector<unsigned> ReservedRegs;
> +  std::vector<unsigned> IndirectRegs;
>    SDNode *Outputs[16];
>    bool HasLinearInterpolation;
>    bool HasPerspectiveInterpolation;
> diff --git a/lib/Target/R600/R600RegisterInfo.cpp 
> b/lib/Target/R600/R600RegisterInfo.cpp
> index 0441e4a..ecd0403 100644
> --- a/lib/Target/R600/R600RegisterInfo.cpp
> +++ b/lib/Target/R600/R600RegisterInfo.cpp
> @@ -15,6 +15,7 @@
> #include "R600RegisterInfo.h"
> #include "AMDGPUTargetMachine.h"
> #include "R600Defines.h"
> +#include "R600InstrInfo.h"
> #include "R600MachineFunctionInfo.h"
> 
> using namespace llvm;
> @@ -44,11 +45,23 @@ BitVector R600RegisterInfo::getReservedRegs(const 
> MachineFunction &MF) const {
>    Reserved.set(AMDGPU::PRED_SEL_ZERO);
>    Reserved.set(AMDGPU::PRED_SEL_ONE);
> 
> +  for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(),
> +                        E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) {
> +    Reserved.set(*I);
> +  }
> +
>    for (std::vector<unsigned>::const_iterator I = 
> MFI->ReservedRegs.begin(),
>                                      E = MFI->ReservedRegs.end(); I != E; 
> ++I) {
>      Reserved.set(*I);
>    }
> 
> +  const R600InstrInfo *RII = static_cast<const R600InstrInfo*>(&TII);
> +  std::vector<unsigned> IndirectRegs = 
> RII->getIndirectReservedRegs(MF);
> +  for (std::vector<unsigned>::iterator I = IndirectRegs.begin(),
> +                                       E = IndirectRegs.end();
> +                                       I != E; ++I) {
> +    Reserved.set(*I);
> +  }
>    return Reserved;
> }
> 
> @@ -83,3 +96,4 @@ unsigned R600RegisterInfo::getSubRegFromChannel(unsigned 
> Channel) const {
>      case 3: return AMDGPU::sel_w;
>    }
> }
> +
> diff --git a/lib/Target/R600/R600RegisterInfo.td 
> b/lib/Target/R600/R600RegisterInfo.td
> index 993fefc..e119e7a 100644
> --- a/lib/Target/R600/R600RegisterInfo.td
> +++ b/lib/Target/R600/R600RegisterInfo.td
> @@ -27,6 +27,12 @@ foreach Index = 0-127 in {
>    foreach Chan = [ "X", "Y", "Z", "W" ] 
> in {
>      // 32-bit Temporary Registers
>      def T#Index#_#Chan : R600RegWithChan 
> <"T"#Index#"."#Chan, Index, Chan>;
> +
> +    // Indirect addressing offset registers
> +    def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + 
> AR.x)."#Chan,
> +                                              Index, Chan>;
> +    def TRegMem#Index#_#Chan : R600RegWithChan 
> <"T"#Index#"."#Chan, Index,
> +                                                Chan>;
>    }
>    // 128-bit Temporary Registers
>    def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
> @@ -57,6 +63,7 @@ def PREDICATE_BIT : R600Reg<"PredicateBit", 
> 0>;
> def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
> def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
> def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
> +def AR_X : R600Reg<"AR.x", 0>;
> 
> def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
>                            (add (sequence "ArrayBase%u", 448, 
> 464))>;
> @@ -66,6 +73,17 @@ def ALU_CONST : R600Reg<"CBuf", 0>;
> // interpolation param reference, SRCx_SEL contains index
> def ALU_PARAM : R600Reg<"Param", 0>;
> 
> +let isAllocatable = 0 in {
> +
> +def R600_Addr : RegisterClass <"AMDGPU", [i32], 127,
> +                          (add (interleave
> +                                  (interleave (sequence "Addr%u_X", 
> 0, 127),
> +                                              (sequence "Addr%u_Z", 
> 0, 127)),
> +                                  (interleave (sequence "Addr%u_Y", 
> 0, 127),
> +                                              (sequence "Addr%u_W", 
> 0, 127))))>;
> +
> +} // End isAllocatable = 0
> +
> def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
>                                     (add (sequence "T%u_X", 0, 
> 127))>;
> 
> @@ -85,6 +103,7 @@ def R600_TReg32 : RegisterClass <"AMDGPU", [f32, 
> i32], 32,
> def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
>      R600_TReg32,
>      R600_ArrayBase,
> +    R600_Addr,
>      ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
>      ALU_CONST, ALU_PARAM
>      )>;
> @@ -99,3 +118,57 @@ def R600_Reg128 : RegisterClass<"AMDGPU", 
> [v4f32, v4i32], 128,
>                                  (add (sequence "T%u_XYZW", 0, 
> 127))> {
>    let CopyCost = -1;
> }
> +
> +//===----------------------------------------------------------------------===//
> +// Register classes for indirect addressing
> +//===----------------------------------------------------------------------===//
> +
> +// Super register for all the Indirect Registers.  This register class is used
> +// by the REG_SEQUENCE instruction to specify the registers to use for direct
> +// reads / writes which may be written / read by an indirect address.
> +class IndirectSuper<string n, list<Register> subregs> :
> +    RegisterWithSubRegs<n, subregs> {
> +  let Namespace = "AMDGPU";
> +  let SubRegIndices =
> + [indirect_0,indirect_1,indirect_2,indirect_3,indirect_4,indirect_5,indirect_6,
> + indirect_7,indirect_8,indirect_9,indirect_10,indirect_11,indirect_12,
> + indirect_13,indirect_14,indirect_15,indirect_16,indirect_17,indirect_18,
> + indirect_19,indirect_20,indirect_21,indirect_22,indirect_23,indirect_24,
> + indirect_25,indirect_26,indirect_27,indirect_28,indirect_29,indirect_30,
> + indirect_31,indirect_32,indirect_33,indirect_34,indirect_35,indirect_36,
> + indirect_37,indirect_38,indirect_39,indirect_40,indirect_41,indirect_42,
> + indirect_43,indirect_44,indirect_45,indirect_46,indirect_47,indirect_48,
> + indirect_49,indirect_50,indirect_51,indirect_52,indirect_53,indirect_54,
> + indirect_55,indirect_56,indirect_57,indirect_58,indirect_59,indirect_60,
> + indirect_61,indirect_62,indirect_63];
> +
> +}
> +
> +def IndirectSuperReg : IndirectSuper<"Indirect",
> +  [TRegMem0_X,TRegMem0_Y,TRegMem0_Z,TRegMem0_W,
> +   TRegMem1_X,TRegMem1_Y,TRegMem1_Z,TRegMem1_W,
> +   TRegMem2_X,TRegMem2_Y,TRegMem2_Z,TRegMem2_W,
> +   TRegMem3_X,TRegMem3_Y,TRegMem3_Z,TRegMem3_W,
> +   TRegMem4_X,TRegMem4_Y,TRegMem4_Z,TRegMem4_W,
> +   TRegMem5_X,TRegMem5_Y,TRegMem5_Z,TRegMem5_W,
> +   TRegMem6_X,TRegMem6_Y,TRegMem6_Z,TRegMem6_W,
> +   TRegMem7_X,TRegMem7_Y,TRegMem7_Z,TRegMem7_W,
> +   TRegMem8_X,TRegMem8_Y,TRegMem8_Z,TRegMem8_W,
> +   TRegMem9_X,TRegMem9_Y,TRegMem9_Z,TRegMem9_W,
> +   TRegMem10_X,TRegMem10_Y,TRegMem10_Z,TRegMem10_W,
> +   TRegMem11_X,TRegMem11_Y,TRegMem11_Z,TRegMem11_W,
> +   TRegMem12_X,TRegMem12_Y,TRegMem12_Z,TRegMem12_W,
> +   TRegMem13_X,TRegMem13_Y,TRegMem13_Z,TRegMem13_W,
> +   TRegMem14_X,TRegMem14_Y,TRegMem14_Z,TRegMem14_W,
> +   TRegMem15_X,TRegMem15_Y,TRegMem15_Z,TRegMem15_W]
> +>;
> +
> +def IndirectReg : RegisterClass<"AMDGPU", [f32, i32], 32, (add 
> IndirectSuperReg)>;
> +
> +// This register class defines the registers that are the storage units for
> +// the "Indirect Addressing" pseudo memory space.
> +def TRegMem : RegisterClass<"AMDGPU", [f32, i32], 32,
> +  (add (interleave
> +        (interleave (sequence "TRegMem%u_X", 0, 16), (sequence 
> "TRegMem%u_Z", 0, 16)),
> +        (interleave (sequence "TRegMem%u_Y", 0, 16), (sequence 
> "TRegMem%u_W", 0, 16))))
> +>;

line wrap at 80 chars

> diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
> index 4aa4a45..f10fd1f 100644
> --- a/lib/Target/R600/SIInstrInfo.cpp
> +++ b/lib/Target/R600/SIInstrInfo.cpp
> @@ -87,3 +87,51 @@ bool
> SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
>    return RC != &AMDGPU::EXECRegRegClass;
> }
> +
> +//===----------------------------------------------------------------------===//
> +// Indirect addressing callbacks
> +//===----------------------------------------------------------------------===//
> +
> +unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
> +                                                 unsigned Channel) const {
> +  assert(Channel == 0);
> +  return RegIndex;
> +}
> +
> +
> +unsigned SIInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) 
> const {
> +  llvm_unreachable("Unimplemented");
> +}
> +
> +unsigned SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const 
> {
> +  llvm_unreachable("Unimplemented");
> +}
> +
> +const TargetRegisterClass *SIInstrInfo::getIndirectAddrStoreRegClass(
> +                                                     unsigned SourceReg) const 
> {
> +  llvm_unreachable("Unimplemented");
> +}
> +
> +const TargetRegisterClass *SIInstrInfo::getIndirectAddrLoadRegClass() const {
> +  llvm_unreachable("Unimplemented");
> +}
> +
> +MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
> +                                   MachineBasicBlock *MBB,
> +                                   MachineBasicBlock::iterator I,
> +                                   unsigned ValueReg,
> +                                   unsigned Address, unsigned OffsetReg) const 
> {
> +  llvm_unreachable("Unimplemented");
> +}
> +
> +MachineInstrBuilder SIInstrInfo::buildIndirectRead(
> +                                   MachineBasicBlock *MBB,
> +                                   MachineBasicBlock::iterator I,
> +                                   unsigned ValueReg,
> +                                   unsigned Address, unsigned OffsetReg) const 
> {
> +  llvm_unreachable("Unimplemented");
> +}
> +
> +const TargetRegisterClass *SIInstrInfo::getSuperIndirectRegClass() const {
> +  llvm_unreachable("Unimplemented");
> +}
> diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
> index 783cd9f..606ba77 100644
> --- a/lib/Target/R600/SIInstrInfo.h
> +++ b/lib/Target/R600/SIInstrInfo.h
> @@ -48,6 +48,32 @@ public:
>    virtual bool isMov(unsigned Opcode) const;
> 
>    virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
> +
> +  virtual unsigned getIndirectIndexBegin(const MachineFunction &MF) const;
> +
> +  virtual unsigned getIndirectIndexEnd(const MachineFunction &MF) const;
> +
> +  virtual unsigned calculateIndirectAddress(unsigned RegIndex,
> +                                            unsigned Channel) const;
> +
> +  virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
> +                                                      unsigned SourceReg) 
> const;
> +
> +  virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
> +
> +  virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
> +                                                 MachineBasicBlock::iterator I,
> +                                                 unsigned ValueReg,
> +                                                 unsigned Address,
> +                                                 unsigned OffsetReg) const;
> +
> +  virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
> +                                                MachineBasicBlock::iterator I,
> +                                                unsigned ValueReg,
> +                                                unsigned Address,
> +                                                unsigned OffsetReg) const;
> +
> +  virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
>    };
> 
> } // End namespace llvm
> -- 
> 1.7.11.4
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>