[Mesa-dev] [PATCH] R600/SI: Make sure M0 is always initialized when DS instructions are used

Tom Stellard tom at stellard.net
Thu Feb 6 09:40:32 PST 2014


From: Tom Stellard <thomas.stellard at amd.com>

DS instructions that access local memory can only uses addresses that
are less than or equal to the value of M0.  When M0 is uninitialized,
then we experience undefined behavior.
---
 lib/Target/R600/SIInstrInfo.cpp        | 12 ++++++++++++
 lib/Target/R600/SIInstrInfo.h          |  1 +
 lib/Target/R600/SIInstrInfo.td         |  8 ++++++++
 lib/Target/R600/SILowerControlFlow.cpp | 10 ++++------
 test/CodeGen/R600/load.ll              | 19 +++++++++++++++++--
 5 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index 68292af..a239fb9 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -313,6 +313,18 @@ SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
   return RC != &AMDGPU::EXECRegRegClass;
 }
 
+namespace llvm {
+namespace AMDGPU {
+// Helper function generated by tablegen.  We are wrapping this with
+// an SIInstrInfo function that reutrns bool rather than int.
+int isDS(uint16_t Opcode);
+}
+}
+
+bool SIInstrInfo::isDS(uint16_t Opcode) const {
+  return ::AMDGPU::isDS(Opcode) != -1;
+}
+
 int SIInstrInfo::isMIMG(uint16_t Opcode) const {
   return get(Opcode).TSFlags & SIInstrFlags::MIMG;
 }
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index 19bcf0c..de3ff07 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -70,6 +70,7 @@ public:
   virtual bool isMov(unsigned Opcode) const;
 
   virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+  bool isDS(uint16_t Opcode) const;
   int isMIMG(uint16_t Opcode) const;
   int isSMRD(uint16_t Opcode) const;
   bool isVOP1(uint16_t Opcode) const;
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index 988ac37..cb2358b 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -628,4 +628,12 @@ def getCommuteOrig : InstrMapping {
   let ValueCols = [["1"]];
 }
 
+def isDS : InstrMapping {
+  let FilterClass = "DS";
+  let RowFields = ["Inst"];
+  let ColFields = ["Size"];
+  let KeyCol = ["8"];
+  let ValueCols = [["8"]];
+}
+
 include "SIInstructions.td"
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index 8c12e13..91323d8 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -67,7 +67,7 @@ private:
 
   static char ID;
   const TargetRegisterInfo *TRI;
-  const TargetInstrInfo *TII;
+  const SIInstrInfo *TII;
 
   bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
 
@@ -408,7 +408,7 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
 }
 
 bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
-  TII = MF.getTarget().getInstrInfo();
+  TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo());
   TRI = MF.getTarget().getRegisterInfo();
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
 
@@ -426,6 +426,8 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
 
       Next = llvm::next(I);
       MachineInstr &MI = *I;
+      NeedM0 |= TII->isDS(MI.getOpcode());
+
       switch (MI.getOpcode()) {
         default: break;
         case AMDGPU::SI_IF:
@@ -488,10 +490,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
 
         case AMDGPU::DS_READ_B32:
           NeedWQM = true;
-          // Fall through
-        case AMDGPU::DS_WRITE_B32:
-        case AMDGPU::DS_ADD_U32_RTN:
-          NeedM0 = true;
           break;
 
         case AMDGPU::V_INTERP_P1_F32:
diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll
index e4492d7..0153524 100644
--- a/test/CodeGen/R600/load.ll
+++ b/test/CodeGen/R600/load.ll
@@ -445,6 +445,7 @@ define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(
 ; R600-CHECK: LDS_UBYTE_READ_RET
 ; SI-CHECK-LABEL: @load_i8_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_U8
 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
   %1 = load i8 addrspace(3)* %in
@@ -458,6 +459,7 @@ define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
 ; R600-CHECK: ASHR
 ; SI-CHECK-LABEL: @load_i8_sext_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_I8
 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
 entry:
@@ -472,6 +474,7 @@ entry:
 ; R600-CHECK: LDS_UBYTE_READ_RET
 ; SI-CHECK-LABEL: @load_v2i8_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_U8
 ; SI-CHECK: DS_READ_U8
 define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
@@ -489,6 +492,7 @@ entry:
 ; R600-CHECK-DAG: ASHR
 ; SI-CHECK-LABEL: @load_v2i8_sext_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_I8
 ; SI-CHECK: DS_READ_I8
 define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
@@ -506,6 +510,7 @@ entry:
 ; R600-CHECK: LDS_UBYTE_READ_RET
 ; SI-CHECK-LABEL: @load_v4i8_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_U8
 ; SI-CHECK: DS_READ_U8
 ; SI-CHECK: DS_READ_U8
@@ -529,6 +534,7 @@ entry:
 ; R600-CHECK-DAG: ASHR
 ; SI-CHECK-LABEL: @load_v4i8_sext_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_I8
 ; SI-CHECK: DS_READ_I8
 ; SI-CHECK: DS_READ_I8
@@ -546,6 +552,7 @@ entry:
 ; R600-CHECK: LDS_USHORT_READ_RET
 ; SI-CHECK-LABEL: @load_i16_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_U16
 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
 entry:
@@ -560,6 +567,7 @@ entry:
 ; R600-CHECK: ASHR
 ; SI-CHECK-LABEL: @load_i16_sext_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_I16
 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
 entry:
@@ -574,6 +582,7 @@ entry:
 ; R600-CHECK: LDS_USHORT_READ_RET
 ; SI-CHECK-LABEL: @load_v2i16_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_U16
 ; SI-CHECK: DS_READ_U16
 define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
@@ -591,6 +600,7 @@ entry:
 ; R600-CHECK-DAG: ASHR
 ; SI-CHECK-LABEL: @load_v2i16_sext_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_I16
 ; SI-CHECK: DS_READ_I16
 define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
@@ -608,6 +618,7 @@ entry:
 ; R600-CHECK: LDS_USHORT_READ_RET
 ; SI-CHECK-LABEL: @load_v4i16_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_U16
 ; SI-CHECK: DS_READ_U16
 ; SI-CHECK: DS_READ_U16
@@ -631,6 +642,7 @@ entry:
 ; R600-CHECK-DAG: ASHR
 ; SI-CHECK-LABEL: @load_v4i16_sext_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_I16
 ; SI-CHECK: DS_READ_I16
 ; SI-CHECK: DS_READ_I16
@@ -643,11 +655,12 @@ entry:
   ret void
 }
 
-; load an i32 value from the glocal address space.
+; load an i32 value from the local address space.
 ; R600-CHECK-LABEL: @load_i32_local
 ; R600-CHECK: LDS_READ_RET
 ; SI-CHECK-LABEL: @load_i32_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_B32
 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
 entry:
@@ -656,10 +669,11 @@ entry:
   ret void
 }
 
-; load a f32 value from the global address space.
+; load a f32 value from the local address space.
 ; R600-CHECK-LABEL: @load_f32_local
 ; R600-CHECK: LDS_READ_RET
 ; SI-CHECK-LABEL: @load_f32_local
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_B32
 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
 entry:
@@ -673,6 +687,7 @@ entry:
 ; R600-CHECK: LDS_READ_RET
 ; R600-CHECK: LDS_READ_RET
 ; SI-CHECK-LABEL: @load_v2f32_local
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_B32
 ; SI-CHECK: DS_READ_B32
 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
-- 
1.8.1.5



More information about the mesa-dev mailing list