[Mesa-dev] [PATCH] R600/SI: Make sure M0 is always initialized when DS instructions are used
Tom Stellard
tom at stellard.net
Thu Feb 6 09:40:32 PST 2014
From: Tom Stellard <thomas.stellard at amd.com>
DS instructions that access local memory can only uses addresses that
are less than or equal to the value of M0. When M0 is uninitialized,
then we experience undefined behavior.
---
lib/Target/R600/SIInstrInfo.cpp | 12 ++++++++++++
lib/Target/R600/SIInstrInfo.h | 1 +
lib/Target/R600/SIInstrInfo.td | 8 ++++++++
lib/Target/R600/SILowerControlFlow.cpp | 10 ++++------
test/CodeGen/R600/load.ll | 19 +++++++++++++++++--
5 files changed, 42 insertions(+), 8 deletions(-)
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index 68292af..a239fb9 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -313,6 +313,18 @@ SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
return RC != &AMDGPU::EXECRegRegClass;
}
+namespace llvm {
+namespace AMDGPU {
+// Helper function generated by tablegen. We are wrapping this with
+// an SIInstrInfo function that reutrns bool rather than int.
+int isDS(uint16_t Opcode);
+}
+}
+
+bool SIInstrInfo::isDS(uint16_t Opcode) const {
+ return ::AMDGPU::isDS(Opcode) != -1;
+}
+
int SIInstrInfo::isMIMG(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::MIMG;
}
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index 19bcf0c..de3ff07 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -70,6 +70,7 @@ public:
virtual bool isMov(unsigned Opcode) const;
virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+ bool isDS(uint16_t Opcode) const;
int isMIMG(uint16_t Opcode) const;
int isSMRD(uint16_t Opcode) const;
bool isVOP1(uint16_t Opcode) const;
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index 988ac37..cb2358b 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -628,4 +628,12 @@ def getCommuteOrig : InstrMapping {
let ValueCols = [["1"]];
}
+def isDS : InstrMapping {
+ let FilterClass = "DS";
+ let RowFields = ["Inst"];
+ let ColFields = ["Size"];
+ let KeyCol = ["8"];
+ let ValueCols = [["8"]];
+}
+
include "SIInstructions.td"
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index 8c12e13..91323d8 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -67,7 +67,7 @@ private:
static char ID;
const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
+ const SIInstrInfo *TII;
bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
@@ -408,7 +408,7 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
}
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
- TII = MF.getTarget().getInstrInfo();
+ TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo());
TRI = MF.getTarget().getRegisterInfo();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -426,6 +426,8 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
Next = llvm::next(I);
MachineInstr &MI = *I;
+ NeedM0 |= TII->isDS(MI.getOpcode());
+
switch (MI.getOpcode()) {
default: break;
case AMDGPU::SI_IF:
@@ -488,10 +490,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::DS_READ_B32:
NeedWQM = true;
- // Fall through
- case AMDGPU::DS_WRITE_B32:
- case AMDGPU::DS_ADD_U32_RTN:
- NeedM0 = true;
break;
case AMDGPU::V_INTERP_P1_F32:
diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll
index e4492d7..0153524 100644
--- a/test/CodeGen/R600/load.ll
+++ b/test/CodeGen/R600/load.ll
@@ -445,6 +445,7 @@ define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(
; R600-CHECK: LDS_UBYTE_READ_RET
; SI-CHECK-LABEL: @load_i8_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_U8
define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
%1 = load i8 addrspace(3)* %in
@@ -458,6 +459,7 @@ define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
; R600-CHECK: ASHR
; SI-CHECK-LABEL: @load_i8_sext_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_I8
define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
entry:
@@ -472,6 +474,7 @@ entry:
; R600-CHECK: LDS_UBYTE_READ_RET
; SI-CHECK-LABEL: @load_v2i8_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_U8
; SI-CHECK: DS_READ_U8
define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
@@ -489,6 +492,7 @@ entry:
; R600-CHECK-DAG: ASHR
; SI-CHECK-LABEL: @load_v2i8_sext_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_I8
; SI-CHECK: DS_READ_I8
define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
@@ -506,6 +510,7 @@ entry:
; R600-CHECK: LDS_UBYTE_READ_RET
; SI-CHECK-LABEL: @load_v4i8_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_U8
; SI-CHECK: DS_READ_U8
; SI-CHECK: DS_READ_U8
@@ -529,6 +534,7 @@ entry:
; R600-CHECK-DAG: ASHR
; SI-CHECK-LABEL: @load_v4i8_sext_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_I8
; SI-CHECK: DS_READ_I8
; SI-CHECK: DS_READ_I8
@@ -546,6 +552,7 @@ entry:
; R600-CHECK: LDS_USHORT_READ_RET
; SI-CHECK-LABEL: @load_i16_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_U16
define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
entry:
@@ -560,6 +567,7 @@ entry:
; R600-CHECK: ASHR
; SI-CHECK-LABEL: @load_i16_sext_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_I16
define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
entry:
@@ -574,6 +582,7 @@ entry:
; R600-CHECK: LDS_USHORT_READ_RET
; SI-CHECK-LABEL: @load_v2i16_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_U16
; SI-CHECK: DS_READ_U16
define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
@@ -591,6 +600,7 @@ entry:
; R600-CHECK-DAG: ASHR
; SI-CHECK-LABEL: @load_v2i16_sext_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_I16
; SI-CHECK: DS_READ_I16
define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
@@ -608,6 +618,7 @@ entry:
; R600-CHECK: LDS_USHORT_READ_RET
; SI-CHECK-LABEL: @load_v4i16_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_U16
; SI-CHECK: DS_READ_U16
; SI-CHECK: DS_READ_U16
@@ -631,6 +642,7 @@ entry:
; R600-CHECK-DAG: ASHR
; SI-CHECK-LABEL: @load_v4i16_sext_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_I16
; SI-CHECK: DS_READ_I16
; SI-CHECK: DS_READ_I16
@@ -643,11 +655,12 @@ entry:
ret void
}
-; load an i32 value from the glocal address space.
+; load an i32 value from the local address space.
; R600-CHECK-LABEL: @load_i32_local
; R600-CHECK: LDS_READ_RET
; SI-CHECK-LABEL: @load_i32_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_B32
define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
@@ -656,10 +669,11 @@ entry:
ret void
}
-; load a f32 value from the global address space.
+; load a f32 value from the local address space.
; R600-CHECK-LABEL: @load_f32_local
; R600-CHECK: LDS_READ_RET
; SI-CHECK-LABEL: @load_f32_local
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_B32
define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
entry:
@@ -673,6 +687,7 @@ entry:
; R600-CHECK: LDS_READ_RET
; R600-CHECK: LDS_READ_RET
; SI-CHECK-LABEL: @load_v2f32_local
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_B32
; SI-CHECK: DS_READ_B32
define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
--
1.8.1.5
More information about the mesa-dev
mailing list