[Mesa-dev] [PATCH] R600: Support for indirect addressing
Vincent Lejeune
vljn at ovi.com
Wed Jan 23 08:50:08 PST 2013
Nice work !
I have some formatting comment, otherwise the patch looks good
----- Mail original -----
> De : ""Tom Stellard" <tom at stellard.net>
> À : mesa-dev at lists.freedesktop.org
> Cc : Tom Stellard <thomas.stellard at amd.com>
> Envoyé le : Mardi 22 janvier 2013 21h15
> Objet : [Mesa-dev] [PATCH] R600: Support for indirect addressing
>
> From: Tom Stellard <thomas.stellard at amd.com>
>
> Only implemented for R600 so far. SI is missing implementations of a
> few callbacks used by the Indirect Addressing pass and needs code to
> handle frame indices.
>
> At the moment R600 only supports array sizes of 16 dwords or less.
> Register packing of vector types is currently disabled, which means that a
> vec4 is stored in T0_X, T1_X, T2_X, T3_X, rather than T0_XYZW. In order
> to correctly pack registers in all cases, we will need to implement an
> analysis pass for R600 that determines the correct vector width for each
> array.
> ---
> lib/Target/R600/AMDGPU.h | 1 +
> lib/Target/R600/AMDGPUFrameLowering.cpp | 121 ++++++++
> lib/Target/R600/AMDGPUFrameLowering.h | 44 +++
> lib/Target/R600/AMDGPUISelLowering.cpp | 2 +
> lib/Target/R600/AMDGPUISelLowering.h | 2 +
> lib/Target/R600/AMDGPUIndirectAddressing.cpp | 319 ++++++++++++++++++++++
> lib/Target/R600/AMDGPUInstrInfo.cpp | 11 +-
> lib/Target/R600/AMDGPUInstrInfo.h | 60 +++-
> lib/Target/R600/AMDGPUInstrInfo.td | 8 +
> lib/Target/R600/AMDGPUInstructions.td | 39 ++-
> lib/Target/R600/AMDGPURegisterInfo.cpp | 71 +++++
> lib/Target/R600/AMDGPURegisterInfo.h | 2 +
> lib/Target/R600/AMDGPURegisterInfo.td | 8 +
> lib/Target/R600/AMDGPUTargetMachine.cpp | 6 +
> lib/Target/R600/AMDGPUTargetMachine.h | 2 +-
> lib/Target/R600/AMDILFrameLowering.cpp | 47 ----
> lib/Target/R600/AMDILFrameLowering.h | 40 ---
> lib/Target/R600/AMDILISelDAGToDAG.cpp | 30 +-
> lib/Target/R600/CMakeLists.txt | 1 +
> lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp | 5 +-
> lib/Target/R600/R600Defines.h | 3 +
> lib/Target/R600/R600ISelLowering.cpp | 181 +++++++++++-
> lib/Target/R600/R600ISelLowering.h | 7 +-
> lib/Target/R600/R600InstrInfo.cpp | 122 +++++++++
> lib/Target/R600/R600InstrInfo.h | 32 +++
> lib/Target/R600/R600Instructions.td | 15 +
> lib/Target/R600/R600MachineFunctionInfo.h | 2 +
> lib/Target/R600/R600RegisterInfo.cpp | 14 +
> lib/Target/R600/R600RegisterInfo.td | 73 +++++
> lib/Target/R600/SIInstrInfo.cpp | 48 ++++
> lib/Target/R600/SIInstrInfo.h | 26 ++
> 31 files changed, 1229 insertions(+), 113 deletions(-)
> create mode 100644 lib/Target/R600/AMDGPUFrameLowering.cpp
> create mode 100644 lib/Target/R600/AMDGPUFrameLowering.h
> create mode 100644 lib/Target/R600/AMDGPUIndirectAddressing.cpp
> delete mode 100644 lib/Target/R600/AMDILFrameLowering.cpp
> delete mode 100644 lib/Target/R600/AMDILFrameLowering.h
>
> diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
> index 1aa607f..bac01a3 100644
> --- a/lib/Target/R600/AMDGPU.h
> +++ b/lib/Target/R600/AMDGPU.h
> @@ -36,6 +36,7 @@ FunctionPass *createSIInsertWaits(TargetMachine &tm);
> // Passes common to R600 and SI
> Pass *createAMDGPUStructurizeCFGPass();
> FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
> +FunctionPass* createAMDGPUIndirectAddressingPass(TargetMachine &tm);
>
> } // End namespace llvm
>
> diff --git a/lib/Target/R600/AMDGPUFrameLowering.cpp
> b/lib/Target/R600/AMDGPUFrameLowering.cpp
> new file mode 100644
> index 0000000..45b9c9e
> --- /dev/null
> +++ b/lib/Target/R600/AMDGPUFrameLowering.cpp
> @@ -0,0 +1,121 @@
> +//===----------------------- AMDGPUFrameLowering.cpp
> ----------------------===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//==-----------------------------------------------------------------------===//
> +//
> +// Interface to describe a layout of a stack frame on a AMDIL target machine
> +//
> +//===----------------------------------------------------------------------===//
> +#include "AMDGPUFrameLowering.h"
> +#include "AMDGPURegisterInfo.h"
> +#include "R600MachineFunctionInfo.h"
> +#include "llvm/CodeGen/MachineFrameInfo.h"
> +#include "llvm/CodeGen/MachineRegisterInfo.h"
> +#include "llvm/Instructions.h"
> +
> +using namespace llvm;
> +AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
> + int LAO, unsigned TransAl)
> + : TargetFrameLowering(D, StackAl, LAO, TransAl) { }
> +
> +AMDGPUFrameLowering::~AMDGPUFrameLowering() { }
> +
> +unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF)
> const {
> +
> + // XXX: Hardcoding to 1 for now.
> + //
> + // I think the StackWidth should stored as metadata associated with the
> + // MachineFunction. This metadata can either be added by a frontend, or
> + // calculated by a R600 specific LLVM IR pass.
> + //
> + // The StackWidth determines how stack objects are laid out in memory.
> + // For a vector stack variable, like: int4 stack[2], the data will be stored
> + // in the following ways depending on the StackWidth.
> + //
> + // StackWidth = 1:
> + //
> + // T0.X = stack[0].x
> + // T1.X = stack[0].y
> + // T2.X = stack[0].z
> + // T3.X = stack[0].w
> + // T4.X = stack[1].x
> + // T5.X = stack[1].y
> + // T6.X = stack[1].z
> + // T7.X = stack[1].w
> + //
> + // StackWidth = 2:
> + //
> + // T0.X = stack[0].x
> + // T0.Y = stack[0].y
> + // T1.X = stack[0].z
> + // T1.Y = stack[0].w
> + // T2.X = stack[1].x
> + // T2.Y = stack[1].y
> + // T3.X = stack[1].z
> + // T3.Y = stack[1].w
> + //
> + // StackWidth = 4:
> + // T0.X = stack[0].x
> + // T0.Y = stack[0].y
> + // T0.Z = stack[0].z
> + // T0.W = stack[0].w
> + // T1.X = stack[1].x
> + // T1.Y = stack[1].y
> + // T1.Z = stack[1].z
> + // T1.W = stack[1].w
> + return 1;
> +}
> +
> +/// \returns The number of registers allocated for \p FI.
> +int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
> + int FI) const {
> + const MachineFrameInfo *MFI = MF.getFrameInfo();
> + unsigned Offset = 0;
> + int UpperBound = FI == -1 ? MFI->getNumObjects() : FI;
> +
> + for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) {
> + const AllocaInst *Alloca = MFI->getObjectAllocation(i);
> + unsigned ArrayElements;
> + const Type *AllocaType = Alloca->getAllocatedType();
> + const Type *ElementType;
> +
> + if (AllocaType->isArrayTy()) {
> + ArrayElements = AllocaType->getArrayNumElements();
> + ElementType = AllocaType->getArrayElementType();
> + } else {
> + ArrayElements = 1;
> + ElementType = AllocaType;
> + }
> +
> + unsigned VectorElements;
> + if (ElementType->isVectorTy()) {
> + VectorElements = ElementType->getVectorNumElements();
> + } else {
> + VectorElements = 1;
> + }
> +
> + Offset += (VectorElements / getStackWidth(MF)) * ArrayElements;
> + }
> + return Offset;
> +}
> +
> +const TargetFrameLowering::SpillSlot *
> +AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
> + NumEntries = 0;
> + return 0;
> +}
> +void
> +AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
> +}
> +void
> +AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock
> &MBB) const {
> +}
80 lines wrap
> +
> +bool
> +AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
> + return false;
> +}
> diff --git a/lib/Target/R600/AMDGPUFrameLowering.h
> b/lib/Target/R600/AMDGPUFrameLowering.h
> new file mode 100644
> index 0000000..cf5742e
> --- /dev/null
> +++ b/lib/Target/R600/AMDGPUFrameLowering.h
> @@ -0,0 +1,44 @@
> +//===--------------------- AMDGPUFrameLowering.h ----------------*- C++
> -*-===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// \brief Interface to describe a layout of a stack frame on a AMDIL
> target
> +/// machine.
> +//
> +//===----------------------------------------------------------------------===//
> +#ifndef AMDILFRAME_LOWERING_H
> +#define AMDILFRAME_LOWERING_H
> +
> +#include "llvm/CodeGen/MachineFunction.h"
> +#include "llvm/Target/TargetFrameLowering.h"
> +
> +namespace llvm {
> +
> +/// \brief Information about the stack frame layout on the AMDGPU targets.
> +///
> +/// It holds the direction of the stack growth, the known stack alignment on
> +/// entry to each function, and the offset to the locals area.
> +/// See TargetFrameInfo for more comments.
> +class AMDGPUFrameLowering : public TargetFrameLowering {
> +public:
> + AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
> + unsigned TransAl = 1);
> + virtual ~AMDGPUFrameLowering();
> +
> + /// \returns The number of 32-bit sub-registers that are used when
> storing
> + /// values to the stack.
> + virtual unsigned getStackWidth(const MachineFunction &MF) const;
> + virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
> + virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries)
> const;
> + virtual void emitPrologue(MachineFunction &MF) const;
> + virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock
> &MBB) const;
> + virtual bool hasFP(const MachineFunction &MF) const;
> +};
> +} // namespace llvm
> +#endif // AMDILFRAME_LOWERING_H
> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp
> b/lib/Target/R600/AMDGPUISelLowering.cpp
> index 309bcf5..a000689 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -414,5 +414,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned
> Opcode) const {
> NODE_NAME_CASE(INTERP_P0)
> NODE_NAME_CASE(EXPORT)
> NODE_NAME_CASE(CONST_ADDRESS)
> + NODE_NAME_CASE(REGISTER_LOAD)
> + NODE_NAME_CASE(REGISTER_STORE)
> }
> }
> diff --git a/lib/Target/R600/AMDGPUISelLowering.h
> b/lib/Target/R600/AMDGPUISelLowering.h
> index 9938c65..708c04a 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.h
> +++ b/lib/Target/R600/AMDGPUISelLowering.h
> @@ -124,6 +124,8 @@ enum {
> INTERP_P0,
> EXPORT,
> CONST_ADDRESS,
> + REGISTER_LOAD,
> + REGISTER_STORE,
> LAST_AMDGPU_ISD_NUMBER
> };
>
> diff --git a/lib/Target/R600/AMDGPUIndirectAddressing.cpp
> b/lib/Target/R600/AMDGPUIndirectAddressing.cpp
> new file mode 100644
> index 0000000..4a5438c
> --- /dev/null
> +++ b/lib/Target/R600/AMDGPUIndirectAddressing.cpp
> @@ -0,0 +1,319 @@
> +//===-- AMDGPUIndirectAddressing.cpp - Indirect Adressing Support
> ---------===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +///
> +/// Instructions can use indirect addressing to index the register file as if
> it
> +/// were memory. This pass lowers RegisterLoad and RegisterStore instructions
> +/// to either a COPY or a MOV that uses indirect addressing.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "AMDGPU.h"
> +#include "R600InstrInfo.h"
> +#include "R600MachineFunctionInfo.h"
> +#include "llvm/CodeGen/MachineFunction.h"
> +#include "llvm/CodeGen/MachineFunctionPass.h"
> +#include "llvm/CodeGen/MachineInstrBuilder.h"
> +#include "llvm/CodeGen/MachineRegisterInfo.h"
> +#include "llvm/Support/Debug.h"
> +
> +using namespace llvm;
> +
> +namespace {
> +
> +class AMDGPUIndirectAddressingPass : public MachineFunctionPass {
> +
> +private:
> + static char ID;
> + const AMDGPUInstrInfo *TII;
> +
> + bool regHasExplicitDef(MachineRegisterInfo &MRI, unsigned Reg) const;
> +
> +public:
> + AMDGPUIndirectAddressingPass(TargetMachine &tm) :
> + MachineFunctionPass(ID),
> + TII(static_cast<const AMDGPUInstrInfo*>(tm.getInstrInfo()))
> + { }
> +
> + virtual bool runOnMachineFunction(MachineFunction &MF);
> +
> + const char *getPassName() const { return "R600 Handle indirect
> addressing"; }
> +
> +};
> +
> +} // End anonymous namespace
> +
> +char AMDGPUIndirectAddressingPass::ID = 0;
> +
> +FunctionPass *llvm::createAMDGPUIndirectAddressingPass(TargetMachine &tm) {
> + return new AMDGPUIndirectAddressingPass(tm);
> +}
> +
> +bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction
> &MF) {
> + MachineRegisterInfo &MRI = MF.getRegInfo();
> +
> + unsigned IndirectBegin = TII->getIndirectIndexBegin(MF);
> + unsigned IndirectEnd = TII->getIndirectIndexEnd(MF);
> +
> + // The map keeps track of the indirect address that is represented by
> + // each virtual register. The key is the register and the value is the
> + // indirect address it uses.
> + std::map<unsigned, unsigned> RegisterAddressMap;
> +
> + // First pass - Lower all of the RegisterStore instructions and track which
> + // registers are live.
> + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
> + BB != BB_E; ++BB) {
> + // This map keeps track of the current live indirect registers.
> + // The key is the address and the value is the register
> + std::map<unsigned, unsigned> LiveAddressRegisterMap;
> + MachineBasicBlock &MBB = *BB;
> +
> + for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
> + I != MBB.end(); I = Next) {
> + Next = llvm::next(I);
> + MachineInstr &MI = *I;
> +
> + if (!TII->isRegisterStore(MI)) {
> + continue;
> + }
> +
> + // Lower RegisterStore
> +
> + unsigned RegIndex = MI.getOperand(2).getImm();
> + unsigned Channel = MI.getOperand(3).getImm();
> + unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
> + const TargetRegisterClass *IndirectStoreRegClass =
> +
> TII->getIndirectAddrStoreRegClass(MI.getOperand(0).getReg());
> +
> + if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
> + // Direct register access.
> + unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
> +
> + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), DstReg)
> + .addOperand(MI.getOperand(0));
> +
> + RegisterAddressMap[DstReg] = Address;
> + LiveAddressRegisterMap[Address] = DstReg;
> + } else {
> + // Indirect register access.
> + MachineInstrBuilder MOV = TII->buildIndirectWrite(BB, I,
> + MI.getOperand(0).getReg(), // Value
> + Address,
> + MI.getOperand(1).getReg()); //
> Offset
> + for (unsigned i = IndirectBegin; i <= IndirectEnd; ++i) {
> + unsigned Addr = TII->calculateIndirectAddress(i, Channel);
> + unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
> + MOV.addReg(DstReg, RegState::Define | RegState::Implicit);
> + RegisterAddressMap[DstReg] = Addr;
> + LiveAddressRegisterMap[Addr] = DstReg;
> + }
> + }
> + MI.eraseFromParent();
> + }
> +
> + // Update the live-ins of the succesor blocks
> + for (MachineBasicBlock::succ_iterator Succ = MBB.succ_begin(),
> + SuccEnd = MBB.succ_end();
> + SuccEnd != Succ; ++Succ) {
> + std::map<unsigned, unsigned>::const_iterator Key, KeyEnd;
> + for (Key = LiveAddressRegisterMap.begin(),
> + KeyEnd = LiveAddressRegisterMap.end(); KeyEnd != Key; ++Key) {
> + (*Succ)->addLiveIn(Key->second);
> + }
> + }
> + }
> +
> + // Second pass - Lower the RegisterLoad instructions
> + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
> + BB != BB_E; ++BB) {
> + // Key is the address and the value is the register
> + std::map<unsigned, unsigned> LiveAddressRegisterMap;
> + MachineBasicBlock &MBB = *BB;
> +
> + MachineBasicBlock::livein_iterator LI = MBB.livein_begin();
> + while (LI != MBB.livein_end()) {
> + std::vector<unsigned> PhiRegisters;
> +
> + // Make sure this live in is used for indirect addressing
> + if (RegisterAddressMap.find(*LI) == RegisterAddressMap.end()) {
> + ++LI;
> + continue;
> + }
> +
> + unsigned Address = RegisterAddressMap[*LI];
> + LiveAddressRegisterMap[Address] = *LI;
> + PhiRegisters.push_back(*LI);
> +
> + // Check if there are other live in registers which map to the same
> + // indirect address.
> + for (MachineBasicBlock::livein_iterator LJ = llvm::next(LI),
> + LE = MBB.livein_end();
> + LJ != LE; ++LJ) {
> + unsigned Reg = *LJ;
> + if (RegisterAddressMap.find(Reg) == RegisterAddressMap.end()) {
> + continue;
> + }
> +
> + if (RegisterAddressMap[Reg] == Address) {
> + if (!regHasExplicitDef(MRI, Reg)) {
> + continue;
> + }
> + PhiRegisters.push_back(Reg);
> + }
> + }
> +
> + if (PhiRegisters.size() == 1) {
> + // We don't need to insert a Phi instruction, so we can just add
> the
> + // registers to the live list for the block.
> + LiveAddressRegisterMap[Address] = *LI;
> + MBB.removeLiveIn(*LI);
> + } else {
> + // We need to insert a PHI, because we have the same address being
> + // written in multiple predecessor blocks.
> + const TargetRegisterClass *PhiDstClass =
> +
> TII->getIndirectAddrStoreRegClass(*(PhiRegisters.begin()));
> + unsigned PhiDstReg = MRI.createVirtualRegister(PhiDstClass);
> + MachineInstrBuilder Phi = BuildMI(MBB, MBB.begin(),
> + MBB.findDebugLoc(MBB.begin()),
> + TII->get(AMDGPU::PHI), PhiDstReg);
> +
> + for (std::vector<unsigned>::const_iterator RI =
> PhiRegisters.begin(),
> + RE = PhiRegisters.end();
> + RI != RE; ++RI) {
> + unsigned Reg = *RI;
> + MachineInstr *DefInst = MRI.getVRegDef(Reg);
> + assert(DefInst);
> + MachineBasicBlock *RegBlock = DefInst->getParent();
> + Phi.addReg(Reg);
> + Phi.addMBB(RegBlock);
> + MBB.removeLiveIn(Reg);
> + }
> + RegisterAddressMap[PhiDstReg] = Address;
> + LiveAddressRegisterMap[Address] = PhiDstReg;
> + }
> + LI = MBB.livein_begin();
> + }
> +
> + for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
> + I != MBB.end(); I = Next) {
> + Next = llvm::next(I);
> + MachineInstr &MI = *I;
> +
> + if (!TII->isRegisterLoad(MI)) {
> + if (MI.getOpcode() == AMDGPU::PHI) {
> + continue;
> + }
> + // Check for indirect register defs
> + for (unsigned OpIdx = 0, NumOperands = MI.getNumOperands();
> + OpIdx < NumOperands; ++OpIdx) {
> + MachineOperand &MO = MI.getOperand(OpIdx);
> + if (MO.isReg() && MO.isDef() &&
> + RegisterAddressMap.find(MO.getReg()) != RegisterAddressMap.end())
> {
> + unsigned Reg = MO.getReg();
> + unsigned LiveAddress = RegisterAddressMap[Reg];
> + // Chain the live-ins
> + if (LiveAddressRegisterMap.find(LiveAddress) !=
> RegisterAddressMap.end()) {
line wrap at 80 char
> + MI.addOperand(MachineOperand::CreateReg(
> + LiveAddressRegisterMap[LiveAddress],
> + false, // isDef
> + true, // isImp
> + true)); // isKill
> + }
> + LiveAddressRegisterMap[LiveAddress] = Reg;
> + }
> + }
> + continue;
> + }
> +
> + const TargetRegisterClass *SuperIndirectRegClass =
> +
> TII->getSuperIndirectRegClass();
> + const TargetRegisterClass *IndirectLoadRegClass =
> +
> TII->getIndirectAddrLoadRegClass();
> + unsigned IndirectReg = MRI.createVirtualRegister(SuperIndirectRegClass);
> +
> + unsigned RegIndex = MI.getOperand(2).getImm();
> + unsigned Channel = MI.getOperand(3).getImm();
> + unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
> +
> + if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
> + // Direct register access
> + unsigned Reg = LiveAddressRegisterMap[Address];
> + unsigned AddrReg = IndirectLoadRegClass->getRegister(Address);
> +
> + if (regHasExplicitDef(MRI, Reg)) {
> + // If the register we are reading from has an explicit def, then that
> + // means it was written via a direct register access (i.e. COPY
> + // or other instruction that doesn't use indirect addressing).
> In
> + // this case we know where the value has been stored, so we can just
> + // issue a copy.
> + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
> + MI.getOperand(0).getReg())
> + .addReg(Reg);
> + } else {
> + // If the register we are reading has an implicit def, then that
> + // means it was written by an indirect register access (i.e. An
> + // instruction that uses indirect addressing.
> + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
> + MI.getOperand(0).getReg())
> + .addReg(AddrReg);
> + }
> + } else {
> + // Indirect register access
> +
> + // Note on REQ_SEQUENCE instructons: You can't actually use the
> register
> + // it defines unless you have an instruction that takes the defined
> + // register class as an operand.
> +
> + MachineInstrBuilder Sequence = BuildMI(MBB, I, MBB.findDebugLoc(I),
> +
> TII->get(AMDGPU::REG_SEQUENCE),
> + IndirectReg);
> + for (unsigned i = IndirectBegin; i <= IndirectEnd; ++i) {
> + unsigned Addr = TII->calculateIndirectAddress(i, Channel);
> + if (LiveAddressRegisterMap.find(Addr) ==
> LiveAddressRegisterMap.end()) {
> + continue;
> + }
> + unsigned Reg = LiveAddressRegisterMap[Addr];
> +
> + // We only need to use REG_SEQUENCE for explicit defs, since the
> + // register coalescer won't do anything with the implicit defs.
> + MachineInstr *DefInstr = MRI.getVRegDef(Reg);
> + if (!DefInstr->getOperand(0).isReg() ||
> + DefInstr->getOperand(0).getReg() != Reg) {
> + continue;
> + }
> +
> + // Insert a REQ_SEQUENCE instruction to force the register allocator
> + // to allocate the virtual register to the correct physical register.
> + Sequence.addReg(LiveAddressRegisterMap[Addr]);
> + Sequence.addImm(TII->getRegisterInfo().getIndirectSubReg(Addr));
> + }
> + MachineInstrBuilder Mov = TII->buildIndirectRead(BB, I,
> + MI.getOperand(0).getReg(), // Value
> + Address,
> + MI.getOperand(1).getReg()); //
> Offset
> +
> +
> +
> + Mov.addReg(IndirectReg, RegState::Implicit | RegState::Kill);
> +
> + }
> + MI.eraseFromParent();
> + }
> + }
> + return false;
> +}
> +
> +bool AMDGPUIndirectAddressingPass::regHasExplicitDef(MachineRegisterInfo
> &MRI,
> + unsigned Reg) const {
> + MachineInstr *DefInstr = MRI.getVRegDef(Reg);
> + return DefInstr && DefInstr->getOperand(0).isReg() &&
> + DefInstr->getOperand(0).getReg() == Reg;
> +}
> diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp
> b/lib/Target/R600/AMDGPUInstrInfo.cpp
> index e42a46d..640707d 100644
> --- a/lib/Target/R600/AMDGPUInstrInfo.cpp
> +++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
> @@ -234,7 +234,16 @@ AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const
> TargetRegisterClass *RC) const {
> // TODO: Implement this function
> return true;
> }
> -
> +
> +bool AMDGPUInstrInfo::isRegisterStore(const MachineInstr &MI) const {
> + return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_STORE;
> +}
> +
> +bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const {
> + return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD;
> +}
> +
> +
> void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction
> &MF,
> DebugLoc DL) const {
> MachineRegisterInfo &MRI = MF.getRegInfo();
> diff --git a/lib/Target/R600/AMDGPUInstrInfo.h
> b/lib/Target/R600/AMDGPUInstrInfo.h
> index 32ac691..4ff63aa 100644
> --- a/lib/Target/R600/AMDGPUInstrInfo.h
> +++ b/lib/Target/R600/AMDGPUInstrInfo.h
> @@ -41,9 +41,10 @@ class MachineInstrBuilder;
> class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
> private:
> const AMDGPURegisterInfo RI;
> - TargetMachine &TM;
> bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
> MachineBasicBlock &MBB) const;
> +protected:
> + TargetMachine &TM;
> public:
> explicit AMDGPUInstrInfo(TargetMachine &tm);
>
> @@ -131,12 +132,66 @@ public:
> bool isAExtLoadInst(llvm::MachineInstr *MI) const;
> bool isStoreInst(llvm::MachineInstr *MI) const;
> bool isTruncStoreInst(llvm::MachineInstr *MI) const;
> + bool isRegisterStore(const MachineInstr &MI) const;
> + bool isRegisterLoad(const MachineInstr &MI) const;
> +
> +//===---------------------------------------------------------------------===//
> +// Pure virtual funtions to be implemented by sub-classes.
> +//===---------------------------------------------------------------------===//
>
> virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
> int64_t Imm) const = 0;
> virtual unsigned getIEQOpcode() const = 0;
> virtual bool isMov(unsigned opcode) const = 0;
>
> + /// \returns the smallest register index that will be accessed by an
> indirect
> + /// read or write.
> + virtual unsigned getIndirectIndexBegin(const MachineFunction &MF) const =
> 0;
> +
> + /// \returns the largest register index that will be accessed by an
> indirect
> + /// read or write.
> + virtual unsigned getIndirectIndexEnd(const MachineFunction &MF) const =
> 0;
> +
> + /// \brief Calculate the "Indirect Address" for the given
> \p RegIndex and
> + /// \p Channel
> + ///
> + /// We model indirect addressing using a virtual address space that can be
> + /// accesed with loads and stores. The "Indirect Address" is the
> memory
> + /// address in this virtual address space that maps to the given \p
> RegIndex
> + /// and \p Channel.
> + virtual unsigned calculateIndirectAddress(unsigned RegIndex,
> + unsigned Channel) const = 0;
> +
> + /// \returns The register class to be used for storing values to an
> + /// "Indirect Address" .
> + virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
> + unsigned SourceReg) const =
> 0;
> +
> + /// \returns The register class to be used for loading values from
> + /// an "Indirect Address" .
> + virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const = 0;
> +
> + /// \brief Build instruction(s) for an indirect register write.
> + ///
> + /// \returns The instruction that performs the indirect register write
> + virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
> + MachineBasicBlock::iterator I,
> + unsigned ValueReg, unsigned Address,
> + unsigned OffsetReg) const = 0;
> +
> + /// \brief Build instruction(s) for an indirect register read.
> + ///
> + /// \returns The instruction that performs the indirect register read
> + virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
> + MachineBasicBlock::iterator I,
> + unsigned ValueReg, unsigned Address,
> + unsigned OffsetReg) const = 0;
> +
> + /// \returns the register class whose sub registers are the set of all
> + /// possible registers that can be used for indirect addressing.
> + virtual const TargetRegisterClass *getSuperIndirectRegClass() const = 0;
> +
> +
> /// \brief Convert the AMDIL MachineInstr to a supported ISA
> /// MachineInstr
> virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
> @@ -146,4 +201,7 @@ public:
>
> } // End llvm namespace
>
> +#define AMDGPU_FLAG_REGISTER_LOAD (1UL << 63)
> +#define AMDGPU_FLAG_REGISTER_STORE (1UL << 62)
> +
> #endif // AMDGPUINSTRINFO_H
> diff --git a/lib/Target/R600/AMDGPUInstrInfo.td
> b/lib/Target/R600/AMDGPUInstrInfo.td
> index 96368e8..b66ae87 100644
> --- a/lib/Target/R600/AMDGPUInstrInfo.td
> +++ b/lib/Target/R600/AMDGPUInstrInfo.td
> @@ -72,3 +72,11 @@ def AMDGPUumin : SDNode<"AMDGPUISD::UMIN",
> SDTIntBinOp,
> def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
>
> def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;
> +
> +def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
> + SDTypeProfile<1, 2, [SDTCisPtrTy<1>,
> SDTCisInt<2>]>,
> + [SDNPHasChain, SDNPMayLoad]>;
> +
> +def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",
> + SDTypeProfile<0, 3, [SDTCisPtrTy<1>,
> SDTCisInt<2>]>,
> + [SDNPHasChain, SDNPMayStore]>;
> diff --git a/lib/Target/R600/AMDGPUInstructions.td
> b/lib/Target/R600/AMDGPUInstructions.td
> index e634d20..3dee004 100644
> --- a/lib/Target/R600/AMDGPUInstructions.td
> +++ b/lib/Target/R600/AMDGPUInstructions.td
> @@ -13,8 +13,8 @@
> //===----------------------------------------------------------------------===//
>
> class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern>
> : Instruction {
> - field bits<16> AMDILOp = 0;
> - field bits<3> Gen = 0;
> + field bit isRegisterLoad = 0;
> + field bit isRegisterStore = 0;
>
> let Namespace = "AMDGPU";
> let OutOperandList = outs;
> @@ -22,8 +22,9 @@ class AMDGPUInst <dag outs, dag ins, string asm,
> list<dag> pattern> : Instructio
> let AsmString = asm;
> let Pattern = pattern;
> let Itinerary = NullALU;
> - let TSFlags{42-40} = Gen;
> - let TSFlags{63-48} = AMDILOp;
> +
> + let TSFlags{63} = isRegisterLoad;
> + let TSFlags{62} = isRegisterStore;
> }
>
> class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag>
> pattern>
> @@ -101,7 +102,9 @@ def FP_ONE : PatLeaf <
> [{return N->isExactlyValue(1.0);}]
>> ;
>
> -let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in {
> +let isCodeGenOnly = 1, isPseudo = 1 in {
> +
> +let usesCustomInserter = 1 in {
>
> class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
> (outs rc:$dst),
> @@ -131,7 +134,31 @@ def SHADER_TYPE : AMDGPUShaderInst <
> [(int_AMDGPU_shader_type imm:$type)]
>> ;
>
> -} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
> +} // usesCustomInserter = 1
> +
> +multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
> + ComplexPattern addrPat> {
> + def RegisterLoad : AMDGPUShaderInst <
> + (outs dstClass:$dst),
> + (ins addrClass:$addr, i32imm:$chan),
> + "RegisterLoad $dst, $addr",
> + [(set (i32 dstClass:$dst), (AMDGPUregister_load addrPat:$addr,
> + (i32 timm:$chan)))]
> + > {
> + let isRegisterLoad = 1;
> + }
> +
> + def RegisterStore : AMDGPUShaderInst <
> + (outs),
> + (ins dstClass:$val, addrClass:$addr, i32imm:$chan),
> + "RegisterStore $val, $addr",
> + [(AMDGPUregister_store (i32 dstClass:$val), addrPat:$addr, (i32
> timm:$chan))]
> + > {
> + let isRegisterStore = 1;
> + }
> +}
> +
> +} // End isCodeGenOnly = 1, isPseudo = 1
>
> /* Generic helper patterns for intrinsics */
> /* -------------------------------------- */
> diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp
> b/lib/Target/R600/AMDGPURegisterInfo.cpp
> index eeafec8..19f89da 100644
> --- a/lib/Target/R600/AMDGPURegisterInfo.cpp
> +++ b/lib/Target/R600/AMDGPURegisterInfo.cpp
> @@ -47,5 +47,76 @@ unsigned AMDGPURegisterInfo::getFrameRegister(const
> MachineFunction &MF) const {
> return 0;
> }
>
> +unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const {
> +
> + switch(IndirectIndex) {
> + case 0: return AMDGPU::indirect_0;
> + case 1: return AMDGPU::indirect_1;
> + case 2: return AMDGPU::indirect_2;
> + case 3: return AMDGPU::indirect_3;
> + case 4: return AMDGPU::indirect_4;
> + case 5: return AMDGPU::indirect_5;
> + case 6: return AMDGPU::indirect_6;
> + case 7: return AMDGPU::indirect_7;
> + case 8: return AMDGPU::indirect_8;
> + case 9: return AMDGPU::indirect_9;
> + case 10: return AMDGPU::indirect_10;
> + case 11: return AMDGPU::indirect_11;
> + case 12: return AMDGPU::indirect_12;
> + case 13: return AMDGPU::indirect_13;
> + case 14: return AMDGPU::indirect_14;
> + case 15: return AMDGPU::indirect_15;
> + case 16: return AMDGPU::indirect_16;
> + case 17: return AMDGPU::indirect_17;
> + case 18: return AMDGPU::indirect_18;
> + case 19: return AMDGPU::indirect_19;
> + case 20: return AMDGPU::indirect_20;
> + case 21: return AMDGPU::indirect_21;
> + case 22: return AMDGPU::indirect_22;
> + case 23: return AMDGPU::indirect_23;
> + case 24: return AMDGPU::indirect_24;
> + case 25: return AMDGPU::indirect_25;
> + case 26: return AMDGPU::indirect_26;
> + case 27: return AMDGPU::indirect_27;
> + case 28: return AMDGPU::indirect_28;
> + case 29: return AMDGPU::indirect_29;
> + case 30: return AMDGPU::indirect_30;
> + case 31: return AMDGPU::indirect_31;
> + case 32: return AMDGPU::indirect_32;
> + case 33: return AMDGPU::indirect_33;
> + case 34: return AMDGPU::indirect_34;
> + case 35: return AMDGPU::indirect_35;
> + case 36: return AMDGPU::indirect_36;
> + case 37: return AMDGPU::indirect_37;
> + case 38: return AMDGPU::indirect_38;
> + case 39: return AMDGPU::indirect_39;
> + case 40: return AMDGPU::indirect_40;
> + case 41: return AMDGPU::indirect_41;
> + case 42: return AMDGPU::indirect_42;
> + case 43: return AMDGPU::indirect_43;
> + case 44: return AMDGPU::indirect_44;
> + case 45: return AMDGPU::indirect_45;
> + case 46: return AMDGPU::indirect_46;
> + case 47: return AMDGPU::indirect_47;
> + case 48: return AMDGPU::indirect_48;
> + case 49: return AMDGPU::indirect_49;
> + case 50: return AMDGPU::indirect_50;
> + case 51: return AMDGPU::indirect_51;
> + case 52: return AMDGPU::indirect_52;
> + case 53: return AMDGPU::indirect_53;
> + case 54: return AMDGPU::indirect_54;
> + case 55: return AMDGPU::indirect_55;
> + case 56: return AMDGPU::indirect_56;
> + case 57: return AMDGPU::indirect_57;
> + case 58: return AMDGPU::indirect_58;
> + case 59: return AMDGPU::indirect_59;
> + case 60: return AMDGPU::indirect_60;
> + case 61: return AMDGPU::indirect_61;
> + case 62: return AMDGPU::indirect_62;
> + case 63: return AMDGPU::indirect_63;
> + default: llvm_unreachable("indirect index out of range");
> + }
> +}
> +
> #define GET_REGINFO_TARGET_DESC
> #include "AMDGPUGenRegisterInfo.inc"
> diff --git a/lib/Target/R600/AMDGPURegisterInfo.h
> b/lib/Target/R600/AMDGPURegisterInfo.h
> index 76ee7ae..5007ff5 100644
> --- a/lib/Target/R600/AMDGPURegisterInfo.h
> +++ b/lib/Target/R600/AMDGPURegisterInfo.h
> @@ -56,6 +56,8 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
> RegScavenger *RS) const;
> unsigned getFrameRegister(const MachineFunction &MF) const;
>
> + unsigned getIndirectSubReg(unsigned IndirectIndex) const;
> +
> };
>
> } // End namespace llvm
> diff --git a/lib/Target/R600/AMDGPURegisterInfo.td
> b/lib/Target/R600/AMDGPURegisterInfo.td
> index 8181e02..8c427fc 100644
> --- a/lib/Target/R600/AMDGPURegisterInfo.td
> +++ b/lib/Target/R600/AMDGPURegisterInfo.td
> @@ -16,6 +16,14 @@ let Namespace = "AMDGPU" in {
> def sel_y : SubRegIndex;
> def sel_z : SubRegIndex;
> def sel_w : SubRegIndex;
> +
> +
> +foreach Index = 0-63 in {
> + def indirect_#Index : SubRegIndex;
> +}
> +
> +def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">;
> +
> }
>
> include "R600RegisterInfo.td"
> diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp
> b/lib/Target/R600/AMDGPUTargetMachine.cpp
> index 7b069e7..dab3497 100644
> --- a/lib/Target/R600/AMDGPUTargetMachine.cpp
> +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
> @@ -102,6 +102,12 @@ AMDGPUPassConfig::addPreISel() {
> bool AMDGPUPassConfig::addInstSelector() {
> addPass(createAMDGPUPeepholeOpt(*TM));
> addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
> +
> + const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
> + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
> + // This callbacks this pass uses are not implemented yet on SI.
> + addPass(createAMDGPUIndirectAddressingPass(*TM));
> + }
> return false;
> }
>
> diff --git a/lib/Target/R600/AMDGPUTargetMachine.h
> b/lib/Target/R600/AMDGPUTargetMachine.h
> index 399e55c..5a1dcf4 100644
> --- a/lib/Target/R600/AMDGPUTargetMachine.h
> +++ b/lib/Target/R600/AMDGPUTargetMachine.h
> @@ -15,9 +15,9 @@
> #ifndef AMDGPU_TARGET_MACHINE_H
> #define AMDGPU_TARGET_MACHINE_H
>
> +#include "AMDGPUFrameLowering.h"
> #include "AMDGPUInstrInfo.h"
> #include "AMDGPUSubtarget.h"
> -#include "AMDILFrameLowering.h"
> #include "AMDILIntrinsicInfo.h"
> #include "R600ISelLowering.h"
> #include "llvm/ADT/OwningPtr.h"
> diff --git a/lib/Target/R600/AMDILFrameLowering.cpp
> b/lib/Target/R600/AMDILFrameLowering.cpp
> deleted file mode 100644
> index 9ad495a..0000000
> --- a/lib/Target/R600/AMDILFrameLowering.cpp
> +++ /dev/null
> @@ -1,47 +0,0 @@
> -//===----------------------- AMDILFrameLowering.cpp -----------------*- C++
> -*-===//
> -//
> -// The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//==-----------------------------------------------------------------------===//
> -//
> -/// \file
> -/// \brief Interface to describe a layout of a stack frame on a AMDGPU
> target
> -/// machine.
> -//
> -//===----------------------------------------------------------------------===//
> -#include "AMDILFrameLowering.h"
> -#include "llvm/CodeGen/MachineFrameInfo.h"
> -
> -using namespace llvm;
> -AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
> - int LAO, unsigned TransAl)
> - : TargetFrameLowering(D, StackAl, LAO, TransAl) {
> -}
> -
> -AMDGPUFrameLowering::~AMDGPUFrameLowering() {
> -}
> -
> -int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
> - int FI) const {
> - const MachineFrameInfo *MFI = MF.getFrameInfo();
> - return MFI->getObjectOffset(FI);
> -}
> -
> -const TargetFrameLowering::SpillSlot *
> -AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
> - NumEntries = 0;
> - return 0;
> -}
> -void
> -AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
> -}
> -void
> -AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock
> &MBB) const {
> -}
> -bool
> -AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
> - return false;
> -}
> diff --git a/lib/Target/R600/AMDILFrameLowering.h
> b/lib/Target/R600/AMDILFrameLowering.h
> deleted file mode 100644
> index 51337c3..0000000
> --- a/lib/Target/R600/AMDILFrameLowering.h
> +++ /dev/null
> @@ -1,40 +0,0 @@
> -//===--------------------- AMDILFrameLowering.h -----------------*- C++
> -*-===//
> -//
> -// The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -/// \file
> -/// \brief Interface to describe a layout of a stack frame on a AMDIL
> target
> -/// machine.
> -//
> -//===----------------------------------------------------------------------===//
> -#ifndef AMDILFRAME_LOWERING_H
> -#define AMDILFRAME_LOWERING_H
> -
> -#include "llvm/CodeGen/MachineFunction.h"
> -#include "llvm/Target/TargetFrameLowering.h"
> -
> -namespace llvm {
> -
> -/// \brief Information about the stack frame layout on the AMDGPU targets.
> -///
> -/// It holds the direction of the stack growth, the known stack alignment on
> -/// entry to each function, and the offset to the locals area.
> -/// See TargetFrameInfo for more comments.
> -class AMDGPUFrameLowering : public TargetFrameLowering {
> -public:
> - AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
> - unsigned TransAl = 1);
> - virtual ~AMDGPUFrameLowering();
> - virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
> - virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries)
> const;
> - virtual void emitPrologue(MachineFunction &MF) const;
> - virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock
> &MBB) const;
> - virtual bool hasFP(const MachineFunction &MF) const;
> -};
> -} // namespace llvm
> -#endif // AMDILFRAME_LOWERING_H
> diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp
> b/lib/Target/R600/AMDILISelDAGToDAG.cpp
> index 567b3e2..3f3e7e2 100644
> --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
> +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
> @@ -75,6 +75,7 @@ private:
> bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue&
> Offset);
> bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
> bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue
> &Offset);
> + bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue
> &Offset);
>
> // Include the pieces autogenerated from the target description.
> #include "AMDGPUGenDAGISel.inc"
> @@ -161,16 +162,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
> }
> switch (Opc) {
> default: break;
> - case ISD::FrameIndex: {
> - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
> - unsigned int FI = FIN->getIndex();
> - EVT OpVT = N->getValueType(0);
> - unsigned int NewOpc = AMDGPU::COPY;
> - SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
> - return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
> - }
> - break;
> - }
> case ISD::ConstantFP:
> case ISD::Constant: {
> const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
> @@ -565,3 +556,22 @@ bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr,
> SDValue& Base,
>
> return true;
> }
> +
> +bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
> + SDValue &Offset) {
> + ConstantSDNode *C;
> +
> + if ((C = dyn_cast<ConstantSDNode>(Addr))) {
> + Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
> + Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
> + } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR)
> &&
> + (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
> + Base = Addr.getOperand(0);
> + Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
> + } else {
> + Base = Addr;
> + Offset = CurDAG->getTargetConstant(0, MVT::i32);
> + }
> +
> + return true;
> +}
> diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
> index c49304f..8ef9f8c 100644
> --- a/lib/Target/R600/CMakeLists.txt
> +++ b/lib/Target/R600/CMakeLists.txt
> @@ -25,6 +25,7 @@ add_llvm_target(AMDGPUCodeGen
> AMDILPeepholeOptimizer.cpp
> AMDILSIDevice.cpp
> AMDGPUAsmPrinter.cpp
> + AMDGPUIndirectAddressing.cpp
> AMDGPUMCInstLower.cpp
> AMDGPUSubtarget.cpp
> AMDGPUTargetMachine.cpp
> diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> index e76c6c8..fb17ab7 100644
> --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> @@ -105,10 +105,7 @@ void AMDGPUInstPrinter::printOMOD(const MCInst *MI,
> unsigned OpNo,
>
> void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo,
> raw_ostream &O) {
> - const MCOperand &Op = MI->getOperand(OpNo);
> - if (Op.getImm() != 0) {
> - O << " + " << Op.getImm();
> - }
> + printIfSet(MI, OpNo, O, "+");
> }
>
> void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
> diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h
> index e19eea3..16cfcf5 100644
> --- a/lib/Target/R600/R600Defines.h
> +++ b/lib/Target/R600/R600Defines.h
> @@ -49,6 +49,9 @@ namespace R600_InstFlag {
> #define HW_REG_MASK 0x1ff
> #define HW_CHAN_SHIFT 9
>
> +#define GET_REG_CHAN(reg) ((reg) >> HW_CHAN_SHIFT)
> +#define GET_REG_INDEX(reg) ((reg) & HW_REG_MASK)
> +
> namespace R600Operands {
> enum Ops {
> DST,
> diff --git a/lib/Target/R600/R600ISelLowering.cpp
> b/lib/Target/R600/R600ISelLowering.cpp
> index 773122b..aad71d4 100644
> --- a/lib/Target/R600/R600ISelLowering.cpp
> +++ b/lib/Target/R600/R600ISelLowering.cpp
> @@ -18,6 +18,7 @@
> #include "R600MachineFunctionInfo.h"
> #include "llvm/Argument.h"
> #include "llvm/Function.h"
> +#include "llvm/CodeGen/MachineFrameInfo.h"
> #include "llvm/CodeGen/MachineInstrBuilder.h"
> #include "llvm/CodeGen/MachineRegisterInfo.h"
> #include "llvm/CodeGen/SelectionDAG.h"
> @@ -71,11 +72,22 @@ R600TargetLowering::R600TargetLowering(TargetMachine
> &TM) :
> setOperationAction(ISD::SELECT, MVT::i32, Custom);
> setOperationAction(ISD::SELECT, MVT::f32, Custom);
>
> + // Legalize loads and stores to the private address space.
> + setOperationAction(ISD::LOAD, MVT::i32, Custom);
> + setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
> + setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
> + setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
> + setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
> + setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
> + setOperationAction(ISD::STORE, MVT::i8, Custom);
> setOperationAction(ISD::STORE, MVT::i32, Custom);
> + setOperationAction(ISD::STORE, MVT::v2i32, Custom);
> setOperationAction(ISD::STORE, MVT::v4i32, Custom);
>
> setOperationAction(ISD::LOAD, MVT::i32, Custom);
> setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
> + setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
> +
> setTargetDAGCombine(ISD::FP_ROUND);
> setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
>
> @@ -376,6 +388,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op,
> SelectionDAG &DAG) const
> case ISD::STORE: return LowerSTORE(Op, DAG);
> case ISD::LOAD: return LowerLOAD(Op, DAG);
> case ISD::FPOW: return LowerFPOW(Op, DAG);
> + case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
> case ISD::INTRINSIC_VOID: {
> SDValue Chain = Op.getOperand(0);
> unsigned IntrinsicID =
> @@ -516,6 +529,10 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
> DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
> return;
> }
> + case ISD::STORE:
> + SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
> + Results.push_back(SDValue(Node, 0));
> + return;
> }
> }
>
> @@ -583,6 +600,20 @@ SDValue
> R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
> false, false, false, 0);
> }
>
> +SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG)
> const {
> +
> + MachineFunction &MF = DAG.getMachineFunction();
> + const AMDGPUFrameLowering *TFL =
> + static_cast<const
> AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
> +
> + FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
> + assert(FIN);
> +
> + unsigned FrameIndex = FIN->getIndex();
> + unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
> + return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
> +}
> +
> SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
> DebugLoc DL = Op.getDebugLoc();
> EVT VT = Op.getValueType();
> @@ -797,6 +828,61 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op,
> SelectionDAG &DAG) const {
> return Cond;
> }
>
> +/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
> +/// convert these pointers to a register index. Each register holds
> +/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
> +/// \p StackWidth, which tells us how many of the 4 sub-registrers will be
> used
> +/// for indirect addressing.
> +SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
> + unsigned StackWidth,
> + SelectionDAG &DAG) const {
> + unsigned SRLPad;
> + switch(StackWidth) {
> + case 1:
> + SRLPad = 2;
> + break;
> + case 2:
> + SRLPad = 3;
> + break;
> + case 4:
> + SRLPad = 4;
> + break;
> + default: llvm_unreachable("Invalid stack width");
> + }
> +
> + return DAG.getNode(ISD::SRL, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr,
> + DAG.getConstant(SRLPad, MVT::i32));
> +}
> +
> +void R600TargetLowering::getStackAddress(unsigned StackWidth,
> + unsigned ElemIdx,
> + unsigned &Channel,
> + unsigned &PtrIncr) const {
> + switch (StackWidth) {
> + default:
> + case 1:
> + Channel = 0;
> + if (ElemIdx > 0) {
> + PtrIncr = 1;
> + } else {
> + PtrIncr = 0;
> + }
> + break;
> + case 2:
> + Channel = ElemIdx % 2;
> + if (ElemIdx == 2) {
> + PtrIncr = 1;
> + } else {
> + PtrIncr = 0;
> + }
> + break;
> + case 4:
> + Channel = ElemIdx;
> + PtrIncr = 0;
> + break;
> + }
> +}
> +
> SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const
> {
> DebugLoc DL = Op.getDebugLoc();
> StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
> @@ -818,7 +904,52 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op,
> SelectionDAG &DAG) const {
> }
> return Chain;
> }
> - return SDValue();
> +
> + EVT ValueVT = Value.getValueType();
> +
> + if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
> + return SDValue();
> + }
> +
> + // Lowering for indirect addressing
> +
> + const MachineFunction &MF = DAG.getMachineFunction();
> + const AMDGPUFrameLowering *TFL = static_cast<const
> AMDGPUFrameLowering*>(
> +
> getTargetMachine().getFrameLowering());
> + unsigned StackWidth = TFL->getStackWidth(MF);
> +
> + Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
> +
> + if (ValueVT.isVector()) {
> + unsigned NumElemVT = ValueVT.getVectorNumElements();
> + EVT ElemVT = ValueVT.getVectorElementType();
> + SDValue Stores[4];
> +
> + assert(NumElemVT >= StackWidth && "Stack width cannot be
> greater than "
> + "vector width in load");
> +
> + for (unsigned i = 0; i < NumElemVT; ++i) {
> + unsigned Channel, PtrIncr;
> + getStackAddress(StackWidth, i, Channel, PtrIncr);
> + Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
> + DAG.getConstant(PtrIncr, MVT::i32));
> + SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
> + Value, DAG.getConstant(i, MVT::i32));
> +
> + Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
> + Chain, Elem, Ptr,
> + DAG.getTargetConstant(Channel, MVT::i32));
> + }
> + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
> + } else {
> + if (ValueVT == MVT::i8) {
> + Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
> + }
> + Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain,
> Value, Ptr,
> + DAG.getTargetConstant(0, MVT::i32)); // Channel
> + }
> +
> + return Chain;
> }
>
> // return (512 + (kc_bank << 12)
> @@ -907,7 +1038,53 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op,
> SelectionDAG &DAG) const
> return DAG.getMergeValues(MergedValues, 2, DL);
> }
>
> - return SDValue();
> + if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
> + return SDValue();
> + }
> +
> + // Lowering for indirect addressing
> + const MachineFunction &MF = DAG.getMachineFunction();
> + const AMDGPUFrameLowering *TFL = static_cast<const
> AMDGPUFrameLowering*>(
> +
> getTargetMachine().getFrameLowering());
> + unsigned StackWidth = TFL->getStackWidth(MF);
> +
> + Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
> +
> + if (VT.isVector()) {
> + unsigned NumElemVT = VT.getVectorNumElements();
> + EVT ElemVT = VT.getVectorElementType();
> + SDValue Loads[4];
> +
> + assert(NumElemVT >= StackWidth && "Stack width cannot be
> greater than "
> + "vector width in load");
> +
> + for (unsigned i = 0; i < NumElemVT; ++i) {
> + unsigned Channel, PtrIncr;
> + getStackAddress(StackWidth, i, Channel, PtrIncr);
> + Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
> + DAG.getConstant(PtrIncr, MVT::i32));
> + Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
> + Chain, Ptr,
> + DAG.getTargetConstant(Channel, MVT::i32),
> + Op.getOperand(2));
> + }
> + for (unsigned i = NumElemVT; i < 4; ++i) {
> + Loads[i] = DAG.getUNDEF(ElemVT);
> + }
> + EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
> + LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
> + } else {
> + LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
> + Chain, Ptr,
> + DAG.getTargetConstant(0, MVT::i32), // Channel
> + Op.getOperand(2));
> + }
> +
> + SDValue Ops[2];
> + Ops[0] = LoweredLoad;
> + Ops[1] = Chain;
> +
> + return DAG.getMergeValues(Ops, 2, DL);
> }
>
> SDValue R600TargetLowering::LowerFPOW(SDValue Op,
> diff --git a/lib/Target/R600/R600ISelLowering.h
> b/lib/Target/R600/R600ISelLowering.h
> index c141d50..afa3897 100644
> --- a/lib/Target/R600/R600ISelLowering.h
> +++ b/lib/Target/R600/R600ISelLowering.h
> @@ -64,7 +64,12 @@ private:
> SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
> SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
> SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
> -
> + SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
> +
> + SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
> + SelectionDAG &DAG) const;
> + void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
> + unsigned &Channel, unsigned &PtrIncr) const;
> bool isZero(SDValue Op) const;
> };
>
> diff --git a/lib/Target/R600/R600InstrInfo.cpp
> b/lib/Target/R600/R600InstrInfo.cpp
> index f7daaf8..0fefbab 100644
> --- a/lib/Target/R600/R600InstrInfo.cpp
> +++ b/lib/Target/R600/R600InstrInfo.cpp
> @@ -16,8 +16,12 @@
> #include "AMDGPUTargetMachine.h"
> #include "AMDGPUSubtarget.h"
> #include "R600Defines.h"
> +#include "R600MachineFunctionInfo.h"
> #include "R600RegisterInfo.h"
> #include "llvm/CodeGen/MachineInstrBuilder.h"
> +#include "llvm/CodeGen/MachineFrameInfo.h"
> +#include "llvm/CodeGen/MachineRegisterInfo.h"
> +#include "llvm/Instructions.h"
>
> #define GET_INSTRINFO_CTOR
> #include "AMDGPUGenDFAPacketizer.inc"
> @@ -464,6 +468,124 @@ unsigned int R600InstrInfo::getInstrLatency(const
> InstrItineraryData *ItinData,
> return 2;
> }
>
> +unsigned R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF)
> const {
> + const MachineRegisterInfo &MRI = MF.getRegInfo();
> + const R600MachineFunctionInfo *MFI =
> MF.getInfo<R600MachineFunctionInfo>();
> + unsigned Offset = 0;
> +
> + if (MRI.livein_empty() && MFI->ReservedRegs.empty()) {
> + return 0;
> + }
> +
> + for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
> + LE = MRI.livein_end();
> + LI != LE; ++LI) {
> + Offset = std::max(Offset,
> +
> (unsigned)GET_REG_INDEX(RI.getEncodingValue(LI->first)));
> + }
> +
> + for (std::vector<unsigned>::const_iterator RRI =
> MFI->ReservedRegs.begin(),
> + RRE = MFI->ReservedRegs.end();
> + RRI != RRE; ++RRI) {
> + Offset = std::max(Offset,
> + (unsigned GET_REG_INDEX(RI.getEncodingValue(*RRI))));
> + }
> +
> + return Offset + 1;
> +}
> +
> +unsigned R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF)
> const {
> + unsigned Offset = 0;
> + const MachineFrameInfo *MFI = MF.getFrameInfo();
> +
> + // Variable sized objects are not supported
> + assert(!MFI->hasVarSizedObjects());
> +
> + // Only one stack object is supported at the moment
> +// assert(MFI->getNumObjects() <= 1);
> +
> + if (MFI->getNumObjects() == 0) {
> + return 0;
> + }
> +
> + Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
> +
> + return getIndirectIndexBegin(MF) + Offset;
> +}
> +
> +std::vector<unsigned> R600InstrInfo::getIndirectReservedRegs(
> + const MachineFunction &MF)
> const {
> + const AMDGPUFrameLowering *TFL =
> + static_cast<const
> AMDGPUFrameLowering*>(TM.getFrameLowering());
> + unsigned StackWidth = TFL->getStackWidth(MF);
> + unsigned End = getIndirectIndexEnd(MF);
> +
> + std::vector<unsigned> Regs;
> +
> + for (unsigned Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
> + unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
> + Regs.push_back(SuperReg);
> + for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
> + unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) +
> Chan);
> + Regs.push_back(Reg);
> + }
> + }
> + return Regs;
> +}
> +
> +unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
> + unsigned Channel) const {
> + return (4 * RegIndex) + Channel;
> +}
> +
> +const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass(
> + unsigned SourceReg) const
> {
> + return &AMDGPU::R600_TReg32RegClass;
> +}
> +
> +const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const {
> + return &AMDGPU::TRegMemRegClass;
> +}
> +
> +MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
> + MachineBasicBlock::iterator I,
> + unsigned ValueReg, unsigned Address,
> + unsigned OffsetReg) const {
> + unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
> + MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
> + AMDGPU::AR_X, OffsetReg);
> + setImmOperand(MOVA, R600Operands::WRITE, 0);
> +
> + MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
> + AddrReg, ValueReg)
> + .addReg(AMDGPU::AR_X,
> RegState::Implicit);
> + setImmOperand(Mov, R600Operands::DST_REL, 1);
> + return Mov;
> +}
> +
> +MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
> + MachineBasicBlock::iterator I,
> + unsigned ValueReg, unsigned Address,
> + unsigned OffsetReg) const {
> + unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
> + MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
> + AMDGPU::AR_X,
> + OffsetReg);
> + setImmOperand(MOVA, R600Operands::WRITE, 0);
> + MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
> + ValueReg,
> + AddrReg)
> + .addReg(AMDGPU::AR_X,
> RegState::Implicit);
> + setImmOperand(Mov, R600Operands::SRC0_REL, 1);
> +
> + return Mov;
> +}
> +
> +const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const {
> + return &AMDGPU::IndirectRegRegClass;
> +}
> +
> +
> MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock
> &MBB,
> MachineBasicBlock::iterator
> I,
> unsigned Opcode,
> diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
> index 6bb0ca9..a50d18e 100644
> --- a/lib/Target/R600/R600InstrInfo.h
> +++ b/lib/Target/R600/R600InstrInfo.h
> @@ -114,6 +114,38 @@ namespace llvm {
> virtual int getInstrLatency(const InstrItineraryData *ItinData,
> SDNode *Node) const { return 1;}
>
> + /// \returns a list of all the registers that may be accesed using
> indirect
> + /// addressing.
> + std::vector<unsigned> getIndirectReservedRegs(const MachineFunction
> &MF) const;
> +
> + virtual unsigned getIndirectIndexBegin(const MachineFunction &MF) const;
> +
> + virtual unsigned getIndirectIndexEnd(const MachineFunction &MF) const;
> +
> +
> + virtual unsigned calculateIndirectAddress(unsigned RegIndex,
> + unsigned Channel) const;
> +
> + virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
> + unsigned SourceReg)
> const;
> +
> + virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
> +
> + virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
> + MachineBasicBlock::iterator I,
> + unsigned ValueReg, unsigned Address,
> + unsigned OffsetReg) const;
> +
> + virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
> + MachineBasicBlock::iterator I,
> + unsigned ValueReg, unsigned Address,
> + unsigned OffsetReg) const;
> +
> + virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
> +
> +
> + ///buildDefaultInstruction - This function returns a MachineInstr with
> + /// all the instruction modifiers initialized to their default values.
> /// You can use this function to avoid manually specifying each instruction
> /// modifier operand when building a new instruction.
> ///
> diff --git a/lib/Target/R600/R600Instructions.td
> b/lib/Target/R600/R600Instructions.td
> index 04b83bc..a6a0213 100644
> --- a/lib/Target/R600/R600Instructions.td
> +++ b/lib/Target/R600/R600Instructions.td
> @@ -91,11 +91,16 @@ def UP : InstFlag <"printUpdatePred">;
> // default to 0.
> def LAST : InstFlag<"printLast", 1>;
>
> +def FRAMEri : Operand<iPTR> {
> + let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
> +}
> +
> def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [],
> []>;
> def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [],
> []>;
> def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [],
> []>;
> def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1,
> "SelectGlobalValueConstantOffset", [], []>;
> def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2,
> "SelectGlobalValueVariableOffset", [], []>;
> +def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect",
> [], []>;
>
> class R600ALU_Word0 {
> field bits<32> Word0;
> @@ -1221,6 +1226,10 @@ let Predicates = [isEGorCayman] in {
> defm DOT4_eg : DOT4_Common<0xBE>;
> defm CUBE_eg : CUBE_Common<0xC0>;
>
> +let hasSideEffects = 1 in {
> + def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>;
> +}
> +
> def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg,
> EXP_IEEE_eg>;
>
> def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
> @@ -1471,6 +1480,12 @@ def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
>
> }
>
> +//===----------------------------------------------------------------------===//
> +// Regist loads and stores - for indirect addressing
> +//===----------------------------------------------------------------------===//
> +
> +defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
> +
> let Predicates = [isCayman] in {
>
> let isVector = 1 in {
> diff --git a/lib/Target/R600/R600MachineFunctionInfo.h
> b/lib/Target/R600/R600MachineFunctionInfo.h
> index 91f9de2..ebb2584 100644
> --- a/lib/Target/R600/R600MachineFunctionInfo.h
> +++ b/lib/Target/R600/R600MachineFunctionInfo.h
> @@ -13,6 +13,7 @@
> #ifndef R600MACHINEFUNCTIONINFO_H
> #define R600MACHINEFUNCTIONINFO_H
>
> +#include "llvm/ADT/BitVector.h"
> #include "llvm/CodeGen/MachineFunction.h"
> #include "llvm/CodeGen/SelectionDAG.h"
> #include <vector>
> @@ -24,6 +25,7 @@ class R600MachineFunctionInfo : public MachineFunctionInfo {
> public:
> R600MachineFunctionInfo(const MachineFunction &MF);
> std::vector<unsigned> ReservedRegs;
> + std::vector<unsigned> IndirectRegs;
> SDNode *Outputs[16];
> bool HasLinearInterpolation;
> bool HasPerspectiveInterpolation;
> diff --git a/lib/Target/R600/R600RegisterInfo.cpp
> b/lib/Target/R600/R600RegisterInfo.cpp
> index 0441e4a..ecd0403 100644
> --- a/lib/Target/R600/R600RegisterInfo.cpp
> +++ b/lib/Target/R600/R600RegisterInfo.cpp
> @@ -15,6 +15,7 @@
> #include "R600RegisterInfo.h"
> #include "AMDGPUTargetMachine.h"
> #include "R600Defines.h"
> +#include "R600InstrInfo.h"
> #include "R600MachineFunctionInfo.h"
>
> using namespace llvm;
> @@ -44,11 +45,23 @@ BitVector R600RegisterInfo::getReservedRegs(const
> MachineFunction &MF) const {
> Reserved.set(AMDGPU::PRED_SEL_ZERO);
> Reserved.set(AMDGPU::PRED_SEL_ONE);
>
> + for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(),
> + E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) {
> + Reserved.set(*I);
> + }
> +
> for (std::vector<unsigned>::const_iterator I =
> MFI->ReservedRegs.begin(),
> E = MFI->ReservedRegs.end(); I != E;
> ++I) {
> Reserved.set(*I);
> }
>
> + const R600InstrInfo *RII = static_cast<const R600InstrInfo*>(&TII);
> + std::vector<unsigned> IndirectRegs =
> RII->getIndirectReservedRegs(MF);
> + for (std::vector<unsigned>::iterator I = IndirectRegs.begin(),
> + E = IndirectRegs.end();
> + I != E; ++I) {
> + Reserved.set(*I);
> + }
> return Reserved;
> }
>
> @@ -83,3 +96,4 @@ unsigned R600RegisterInfo::getSubRegFromChannel(unsigned
> Channel) const {
> case 3: return AMDGPU::sel_w;
> }
> }
> +
> diff --git a/lib/Target/R600/R600RegisterInfo.td
> b/lib/Target/R600/R600RegisterInfo.td
> index 993fefc..e119e7a 100644
> --- a/lib/Target/R600/R600RegisterInfo.td
> +++ b/lib/Target/R600/R600RegisterInfo.td
> @@ -27,6 +27,12 @@ foreach Index = 0-127 in {
> foreach Chan = [ "X", "Y", "Z", "W" ]
> in {
> // 32-bit Temporary Registers
> def T#Index#_#Chan : R600RegWithChan
> <"T"#Index#"."#Chan, Index, Chan>;
> +
> + // Indirect addressing offset registers
> + def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" +
> AR.x)."#Chan,
> + Index, Chan>;
> + def TRegMem#Index#_#Chan : R600RegWithChan
> <"T"#Index#"."#Chan, Index,
> + Chan>;
> }
> // 128-bit Temporary Registers
> def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
> @@ -57,6 +63,7 @@ def PREDICATE_BIT : R600Reg<"PredicateBit",
> 0>;
> def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
> def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
> def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
> +def AR_X : R600Reg<"AR.x", 0>;
>
> def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
> (add (sequence "ArrayBase%u", 448,
> 464))>;
> @@ -66,6 +73,17 @@ def ALU_CONST : R600Reg<"CBuf", 0>;
> // interpolation param reference, SRCx_SEL contains index
> def ALU_PARAM : R600Reg<"Param", 0>;
>
> +let isAllocatable = 0 in {
> +
> +def R600_Addr : RegisterClass <"AMDGPU", [i32], 127,
> + (add (interleave
> + (interleave (sequence "Addr%u_X",
> 0, 127),
> + (sequence "Addr%u_Z",
> 0, 127)),
> + (interleave (sequence "Addr%u_Y",
> 0, 127),
> + (sequence "Addr%u_W",
> 0, 127))))>;
> +
> +} // End isAllocatable = 0
> +
> def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
> (add (sequence "T%u_X", 0,
> 127))>;
>
> @@ -85,6 +103,7 @@ def R600_TReg32 : RegisterClass <"AMDGPU", [f32,
> i32], 32,
> def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
> R600_TReg32,
> R600_ArrayBase,
> + R600_Addr,
> ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
> ALU_CONST, ALU_PARAM
> )>;
> @@ -99,3 +118,57 @@ def R600_Reg128 : RegisterClass<"AMDGPU",
> [v4f32, v4i32], 128,
> (add (sequence "T%u_XYZW", 0,
> 127))> {
> let CopyCost = -1;
> }
> +
> +//===----------------------------------------------------------------------===//
> +// Register classes for indirect addressing
> +//===----------------------------------------------------------------------===//
> +
> +// Super register for all the Indirect Registers. This register class is used
> +// by the REG_SEQUENCE instruction to specify the registers to use for direct
> +// reads / writes which may be written / read by an indirect address.
> +class IndirectSuper<string n, list<Register> subregs> :
> + RegisterWithSubRegs<n, subregs> {
> + let Namespace = "AMDGPU";
> + let SubRegIndices =
> + [indirect_0,indirect_1,indirect_2,indirect_3,indirect_4,indirect_5,indirect_6,
> + indirect_7,indirect_8,indirect_9,indirect_10,indirect_11,indirect_12,
> + indirect_13,indirect_14,indirect_15,indirect_16,indirect_17,indirect_18,
> + indirect_19,indirect_20,indirect_21,indirect_22,indirect_23,indirect_24,
> + indirect_25,indirect_26,indirect_27,indirect_28,indirect_29,indirect_30,
> + indirect_31,indirect_32,indirect_33,indirect_34,indirect_35,indirect_36,
> + indirect_37,indirect_38,indirect_39,indirect_40,indirect_41,indirect_42,
> + indirect_43,indirect_44,indirect_45,indirect_46,indirect_47,indirect_48,
> + indirect_49,indirect_50,indirect_51,indirect_52,indirect_53,indirect_54,
> + indirect_55,indirect_56,indirect_57,indirect_58,indirect_59,indirect_60,
> + indirect_61,indirect_62,indirect_63];
> +
> +}
> +
> +def IndirectSuperReg : IndirectSuper<"Indirect",
> + [TRegMem0_X,TRegMem0_Y,TRegMem0_Z,TRegMem0_W,
> + TRegMem1_X,TRegMem1_Y,TRegMem1_Z,TRegMem1_W,
> + TRegMem2_X,TRegMem2_Y,TRegMem2_Z,TRegMem2_W,
> + TRegMem3_X,TRegMem3_Y,TRegMem3_Z,TRegMem3_W,
> + TRegMem4_X,TRegMem4_Y,TRegMem4_Z,TRegMem4_W,
> + TRegMem5_X,TRegMem5_Y,TRegMem5_Z,TRegMem5_W,
> + TRegMem6_X,TRegMem6_Y,TRegMem6_Z,TRegMem6_W,
> + TRegMem7_X,TRegMem7_Y,TRegMem7_Z,TRegMem7_W,
> + TRegMem8_X,TRegMem8_Y,TRegMem8_Z,TRegMem8_W,
> + TRegMem9_X,TRegMem9_Y,TRegMem9_Z,TRegMem9_W,
> + TRegMem10_X,TRegMem10_Y,TRegMem10_Z,TRegMem10_W,
> + TRegMem11_X,TRegMem11_Y,TRegMem11_Z,TRegMem11_W,
> + TRegMem12_X,TRegMem12_Y,TRegMem12_Z,TRegMem12_W,
> + TRegMem13_X,TRegMem13_Y,TRegMem13_Z,TRegMem13_W,
> + TRegMem14_X,TRegMem14_Y,TRegMem14_Z,TRegMem14_W,
> + TRegMem15_X,TRegMem15_Y,TRegMem15_Z,TRegMem15_W]
> +>;
> +
> +def IndirectReg : RegisterClass<"AMDGPU", [f32, i32], 32, (add
> IndirectSuperReg)>;
> +
> +// This register class defines the registers that are the storage units for
> +// the "Indirect Addressing" pseudo memory space.
> +def TRegMem : RegisterClass<"AMDGPU", [f32, i32], 32,
> + (add (interleave
> + (interleave (sequence "TRegMem%u_X", 0, 16), (sequence
> "TRegMem%u_Z", 0, 16)),
> + (interleave (sequence "TRegMem%u_Y", 0, 16), (sequence
> "TRegMem%u_W", 0, 16))))
> +>;
line wrap at 80 chars
> diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
> index 4aa4a45..f10fd1f 100644
> --- a/lib/Target/R600/SIInstrInfo.cpp
> +++ b/lib/Target/R600/SIInstrInfo.cpp
> @@ -87,3 +87,51 @@ bool
> SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
> return RC != &AMDGPU::EXECRegRegClass;
> }
> +
> +//===----------------------------------------------------------------------===//
> +// Indirect addressing callbacks
> +//===----------------------------------------------------------------------===//
> +
> +unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
> + unsigned Channel) const {
> + assert(Channel == 0);
> + return RegIndex;
> +}
> +
> +
> +unsigned SIInstrInfo::getIndirectIndexBegin(const MachineFunction &MF)
> const {
> + llvm_unreachable("Unimplemented");
> +}
> +
> +unsigned SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const
> {
> + llvm_unreachable("Unimplemented");
> +}
> +
> +const TargetRegisterClass *SIInstrInfo::getIndirectAddrStoreRegClass(
> + unsigned SourceReg) const
> {
> + llvm_unreachable("Unimplemented");
> +}
> +
> +const TargetRegisterClass *SIInstrInfo::getIndirectAddrLoadRegClass() const {
> + llvm_unreachable("Unimplemented");
> +}
> +
> +MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
> + MachineBasicBlock *MBB,
> + MachineBasicBlock::iterator I,
> + unsigned ValueReg,
> + unsigned Address, unsigned OffsetReg) const
> {
> + llvm_unreachable("Unimplemented");
> +}
> +
> +MachineInstrBuilder SIInstrInfo::buildIndirectRead(
> + MachineBasicBlock *MBB,
> + MachineBasicBlock::iterator I,
> + unsigned ValueReg,
> + unsigned Address, unsigned OffsetReg) const
> {
> + llvm_unreachable("Unimplemented");
> +}
> +
> +const TargetRegisterClass *SIInstrInfo::getSuperIndirectRegClass() const {
> + llvm_unreachable("Unimplemented");
> +}
> diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
> index 783cd9f..606ba77 100644
> --- a/lib/Target/R600/SIInstrInfo.h
> +++ b/lib/Target/R600/SIInstrInfo.h
> @@ -48,6 +48,32 @@ public:
> virtual bool isMov(unsigned Opcode) const;
>
> virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
> +
> + virtual unsigned getIndirectIndexBegin(const MachineFunction &MF) const;
> +
> + virtual unsigned getIndirectIndexEnd(const MachineFunction &MF) const;
> +
> + virtual unsigned calculateIndirectAddress(unsigned RegIndex,
> + unsigned Channel) const;
> +
> + virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
> + unsigned SourceReg)
> const;
> +
> + virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
> +
> + virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
> + MachineBasicBlock::iterator I,
> + unsigned ValueReg,
> + unsigned Address,
> + unsigned OffsetReg) const;
> +
> + virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
> + MachineBasicBlock::iterator I,
> + unsigned ValueReg,
> + unsigned Address,
> + unsigned OffsetReg) const;
> +
> + virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
> };
>
> } // End namespace llvm
> --
> 1.7.11.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
More information about the mesa-dev
mailing list