[Mesa-dev] [PATCH] R600/SI: Implement spilling of SGPRs v4
Tom Stellard
tom at stellard.net
Mon Nov 25 20:32:06 PST 2013
From: Tom Stellard <thomas.stellard at amd.com>
SGPRs are spilled into VGPRs using the {READ,WRITE}LANE_B32 instructions.
v2:
- Fix encoding of Lane Mask
- Use correct register flags, so we don't overwrite the low dword
when restoring multi-dword registers.
v3:
- Register spilling seems to hang the GPU, so replace all shaders
that need spilling with a dummy shader.
v4:
- Fix *LANE definitions
- Change destination reg class for 32-bit SMRD instructions
https://bugs.freedesktop.org/show_bug.cgi?id=71285
NOTE: This is a candidate for the 3.4 branch.
---
lib/Target/R600/AMDGPUInstrInfo.h | 20 ++++-----
lib/Target/R600/SIInstrInfo.cpp | 75 +++++++++++++++++++++++++++++++
lib/Target/R600/SIInstrInfo.h | 12 +++++
lib/Target/R600/SIInstructions.td | 24 ++++++++--
lib/Target/R600/SIMachineFunctionInfo.cpp | 35 ++++++++++++++-
lib/Target/R600/SIMachineFunctionInfo.h | 28 ++++++++++++
lib/Target/R600/SIRegisterInfo.cpp | 19 ++++++++
lib/Target/R600/SIRegisterInfo.h | 1 +
8 files changed, 199 insertions(+), 15 deletions(-)
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h
index ce5b58c..426910c 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -78,18 +78,18 @@ public:
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const = 0;
- void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
- void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
protected:
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index c05d278..b3df0d6 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -16,6 +16,7 @@
#include "SIInstrInfo.h"
#include "AMDGPUTargetMachine.h"
#include "SIDefines.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -185,6 +186,80 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
return Opcode;
}
+void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
+ DebugLoc DL = MBB.findDebugLoc(MI);
+ unsigned KillFlag = isKill ? RegState::Kill : 0;
+
+ if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) {
+ if (RI.allUsesCanBeCopiedToVGPR(MRI, MI->getOperand(0).getReg())) {
+ unsigned VReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+ BuildMI(MBB, MI, DL, get(AMDGPU::COPY), VReg)
+ .addReg(SrcReg, KillFlag);
+ MFI->SpillTracker.addSpilledReg(FrameIndex, VReg);
+ } else {
+ unsigned Lane = MFI->SpillTracker.getNextLane(MRI);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32),
+ MFI->SpillTracker.LaneVGPR)
+ .addReg(SrcReg, KillFlag)
+ .addImm(Lane);
+ MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR,
+ Lane);
+ }
+ } else {
+ for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) {
+ unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), get(AMDGPU::COPY), SubReg)
+ .addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
+ storeRegToStackSlot(MBB, MI, SubReg, isKill, FrameIndex + i,
+ &AMDGPU::SReg_32RegClass, TRI);
+ }
+ }
+}
+
+void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
+ DebugLoc DL = MBB.findDebugLoc(MI);
+ if (TRI->getCommonSubClass(RC, &AMDGPU::SReg_32RegClass)) {
+ SIMachineFunctionInfo::SpilledReg Spill =
+ MFI->SpillTracker.getSpilledReg(FrameIndex);
+ assert(Spill.VGPR);
+ if (!Spill.hasLane()) {
+ MRI.setRegClass(DestReg, MRI.getRegClass(Spill.VGPR));
+ BuildMI(MBB, MI, DL, get(AMDGPU::COPY), DestReg)
+ .addReg(Spill.VGPR);
+ } else {
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), DestReg)
+ .addReg(Spill.VGPR)
+ .addImm(Spill.Lane);
+ }
+ } else {
+ for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) {
+ unsigned Flags = RegState::Define;
+ if (i == 0) {
+ Flags |= RegState::Undef;
+ }
+ unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ loadRegFromStackSlot(MBB, MI, SubReg, FrameIndex + i,
+ &AMDGPU::SReg_32RegClass, TRI);
+ BuildMI(MBB, MI, DL, get(AMDGPU::COPY))
+ .addReg(DestReg, Flags, RI.getSubRegFromChannel(i))
+ .addReg(SubReg);
+ }
+ }
+}
+
MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
bool NewMI) const {
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index 4daec4c..d09d153 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -43,6 +43,18 @@ public:
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
unsigned commuteOpcode(unsigned Opcode) const;
virtual MachineInstr *commuteInstruction(MachineInstr *MI,
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 6e61e69..b114ca5 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -489,14 +489,17 @@ def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "TBUFFER_STORE_F
let mayLoad = 1 in {
-defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SReg_32>;
+// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
+// SMRD instructions, because the SGPR_32 register class does not include M0
+// and writing to M0 from an SMRD instruction will hang the GPU.
+defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SGPR_32>;
defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>;
defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>;
defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>;
defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>;
defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
- 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32
+ 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SGPR_32
>;
defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
@@ -873,8 +876,21 @@ def : Pat <
$src2), sub1)
>;
-defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>;
-defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>;
+def V_READLANE_B32 : VOP2 <
+ 0x00000001,
+ (outs SReg_32:$vdst),
+ (ins VReg_32:$src0, SSrc_32:$vsrc1),
+ "V_READLANE_B32 $vdst, $src0, $vsrc1",
+ []
+>;
+
+def V_WRITELANE_B32 : VOP2 <
+ 0x00000002,
+ (outs VReg_32:$vdst),
+ (ins SReg_32:$src0, SSrc_32:$vsrc1),
+ "V_WRITELANE_B32 $vdst, $src0, $vsrc1",
+ []
+>;
let isCommutable = 1 in {
defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32",
diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp
index 071f9fa..ea04346 100644
--- a/lib/Target/R600/SIMachineFunctionInfo.cpp
+++ b/lib/Target/R600/SIMachineFunctionInfo.cpp
@@ -10,6 +10,10 @@
#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define MAX_LANES 64
using namespace llvm;
@@ -19,4 +23,33 @@ void SIMachineFunctionInfo::anchor() {}
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
- PSInputAddr(0) { }
+ PSInputAddr(0),
+ SpillTracker() { }
+
+static unsigned createLaneVGPR(MachineRegisterInfo &MRI) {
+ return MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+}
+
+unsigned SIMachineFunctionInfo::RegSpillTracker::getNextLane(MachineRegisterInfo &MRI) {
+ if (!LaneVGPR) {
+ LaneVGPR = createLaneVGPR(MRI);
+ } else {
+ CurrentLane++;
+ if (CurrentLane == MAX_LANES) {
+ CurrentLane = 0;
+ LaneVGPR = createLaneVGPR(MRI);
+ }
+ }
+ return CurrentLane;
+}
+
+void SIMachineFunctionInfo::RegSpillTracker::addSpilledReg(unsigned FrameIndex,
+ unsigned Reg,
+ int Lane) {
+ SpilledRegisters[FrameIndex] = SpilledReg(Reg, Lane);
+}
+
+const SIMachineFunctionInfo::SpilledReg&
+SIMachineFunctionInfo::RegSpillTracker::getSpilledReg(unsigned FrameIndex) {
+ return SpilledRegisters[FrameIndex];
+}
diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h
index 2f1961c..8dc82a0 100644
--- a/lib/Target/R600/SIMachineFunctionInfo.h
+++ b/lib/Target/R600/SIMachineFunctionInfo.h
@@ -16,16 +16,44 @@
#define SIMACHINEFUNCTIONINFO_H_
#include "AMDGPUMachineFunction.h"
+#include <map>
namespace llvm {
+class MachineRegisterInfo;
+
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
/// tells the hardware which interpolation parameters to load.
class SIMachineFunctionInfo : public AMDGPUMachineFunction {
virtual void anchor();
public:
+
+ struct SpilledReg {
+ unsigned VGPR;
+ int Lane;
+ SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
+ SpilledReg() : VGPR(0), Lane(-1) { }
+ bool hasLane() { return Lane != -1;}
+ };
+
+ struct RegSpillTracker {
+ private:
+ unsigned CurrentLane;
+ std::map<unsigned, SpilledReg> SpilledRegisters;
+ public:
+ unsigned LaneVGPR;
+ RegSpillTracker() : CurrentLane(0), SpilledRegisters(), LaneVGPR(0) { }
+ unsigned getNextLane(MachineRegisterInfo &MRI);
+ void addSpilledReg(unsigned FrameIndex, unsigned Reg, int Lane = -1);
+ const SpilledReg& getSpilledReg(unsigned FrameIndex);
+ bool programSpillsRegisters() { return !SpilledRegisters.empty(); }
+ };
+
+ // SIMachineFunctionInfo definition
+
SIMachineFunctionInfo(const MachineFunction &MF);
unsigned PSInputAddr;
+ struct RegSpillTracker SpillTracker;
};
} // End namespace llvm
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
index ed0bbaf..d4973bd 100644
--- a/lib/Target/R600/SIRegisterInfo.cpp
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -16,6 +16,7 @@
#include "SIRegisterInfo.h"
#include "AMDGPUTargetMachine.h"
#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
@@ -129,3 +130,21 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
return &AMDGPU::VGPR_32RegClass;
}
}
+
+bool SIRegisterInfo::allUsesCanBeCopiedToVGPR(const MachineRegisterInfo &MRI,
+ unsigned Reg) const {
+ if (MRI.use_empty(Reg)) {
+ return false;
+ }
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(TM.getInstrInfo());
+ for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
+ E = MachineRegisterInfo::use_end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ if (!TII->canReadVGPR(MI, I.getOperandNo())) {
+ return false;
+ }
+ }
+ return true;
+}
+
diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
index 8148f7f..369c7a0 100644
--- a/lib/Target/R600/SIRegisterInfo.h
+++ b/lib/Target/R600/SIRegisterInfo.h
@@ -63,6 +63,7 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
/// be returned.
const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC,
unsigned SubIdx) const;
+ bool allUsesCanBeCopiedToVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const;
};
} // End namespace llvm
--
1.8.1.5
More information about the mesa-dev
mailing list