[Mesa-dev] radeonsi: LLVM r187139 broke some piglit tests

Tom Stellard tom at stellard.net
Thu Aug 15 09:16:03 PDT 2013


On Thu, Aug 15, 2013 at 08:22:39AM -0700, Tom Stellard wrote:
> On Thu, Aug 15, 2013 at 11:55:36AM +0200, Michel Dänzer wrote:
> > On Fre, 2013-08-02 at 17:58 +0200, Michel Dänzer wrote:
> > > On Mit, 2013-07-31 at 08:42 -0700, Tom Stellard wrote:
> > > > On Wed, Jul 31, 2013 at 01:04:01PM +0200, Michel Dänzer wrote:
> > > > > 
> > > > > LLVM revision 187139 ('Allocate local registers in order for optimal
> > > > > coloring.') broke some derivative related piglit tests with the radeonsi
> > > > > driver. 
> > > > > 
> > > > > I'm attaching a diff between the bad and good generated code (as printed
> > > > > with RADEON_DUMP_SHADERS=1) for the glsl-derivs test. The only
> > > > > difference I can see is in which registers are used in which order.
> > > > > 
> > > > > I wonder if we might be missing S_WAITCNT after DS_READ/WRITE
> > > > > instructions in some cases, but I haven't spotted any candidates for
> > > > > that in the bad code which aren't there in the good code as well. Can
> > > > > anyone else spot something I've missed?
> > > > 
> > > > Shouldn't we be using the S_BARRIER instruction to keep the threads in sync?
> > > 
> > > Doesn't seem to help unfortunately, but thanks for the good suggestion.
> > 
> > I found one thing going wrong: DS_WRITE_B32 ends up using a VGPR
> > register number instead of the $gds operand for encoding the GDS field
> > (the asm output from llc even shows the VGPR name). If the VGPR number
> > happens to be odd (i.e. to have the least significant bit set), the
> > shader ends up writing to GDS instead of LDS.
> >
> 
> Ouch, that's a pretty bad bug.
> 
> > But I have no idea why this is happening, or how to fix it. :(
> > 
> > 
> 
> I can take a look at it.
>

The attached patch should fix the problem, can you test?

-Tom
-------------- next part --------------
>From c837737e2807b523fffac2ddf9aa9d8f293cf622 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard at amd.com>
Date: Thu, 15 Aug 2013 09:03:08 -0700
Subject: [PATCH] R600/SI: Fix incorrect encoding of DS_WRITE_B32 instructions

The SIInsertWaits pass was overwriting the first operand (gds bit) of
DS_WRITE_B32 with the second operand (value to write).  This meant that
any time the value to write was stored in an odd number VGPR, the gds
bit would be set causing the instruction to write to GDS instead of LDS.
---
 lib/Target/R600/SIInsertWaits.cpp | 4 +---
 lib/Target/R600/SIInstrInfo.td    | 8 ++++----
 lib/Target/R600/SIInstructions.td | 6 +++---
 test/CodeGen/R600/local-memory.ll | 4 ++--
 4 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index ba202e3..c477be5 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -134,9 +134,7 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
   // LGKM may uses larger values
   if (TSFlags & SIInstrFlags::LGKM_CNT) {
 
-    MachineOperand &Op = MI.getOperand(0);
-    if (!Op.isReg())
-      Op = MI.getOperand(1);
+    const MachineOperand &Op = MI.getOperand(0);
     assert(Op.isReg() && "First LGKM operand must be a register!");
 
     unsigned Reg = Op.getReg();
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index ecc4718..1965ba0 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -342,8 +342,8 @@ class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
 class DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
   op,
   (outs regClass:$vdst),
-  (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, VReg_32:$data1,
-       i8imm:$offset0, i8imm:$offset1),
+  (ins VReg_32:$addr, VReg_32:$data0, VReg_32:$data1,
+       i8imm:$offset0, i8imm:$offset1, i1imm:$gds),
   asm#" $vdst, $gds, $addr, $data0, $data1, $offset0, $offset1, [M0]",
   []> {
   let mayLoad = 1;
@@ -353,8 +353,8 @@ class DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
 class DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
   op,
   (outs),
-  (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, VReg_32:$data1,
-       i8imm:$offset0, i8imm:$offset1),
+  (ins VReg_32:$addr, VReg_32:$data0, VReg_32:$data1,
+       i8imm:$offset0, i8imm:$offset1, i1imm:$gds),
   asm#" $gds, $addr, $data0, $data1, $offset0, $offset1, [M0]",
   []> {
   let mayStore = 1;
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 4eb3566..9856fa6 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1745,13 +1745,13 @@ def : Pat <
 
 def : Pat <
     (local_load i64:$src0),
-    (i32 (DS_READ_B32 0, (EXTRACT_SUBREG $src0, sub0),
-                      (EXTRACT_SUBREG $src0, sub0), (EXTRACT_SUBREG $src0, sub0), 0, 0))
+    (i32 (DS_READ_B32 (EXTRACT_SUBREG $src0, sub0),
+                      (EXTRACT_SUBREG $src0, sub0), (EXTRACT_SUBREG $src0, sub0), 0, 0, 0))
 >;
 
 def : Pat <
     (local_store i32:$src1, i64:$src0),
-    (DS_WRITE_B32 0, (EXTRACT_SUBREG $src0, sub0), $src1, $src1, 0, 0)
+    (DS_WRITE_B32 (EXTRACT_SUBREG $src0, sub0), $src1, $src1, 0, 0, 0)
 >;
 
 /********** ================== **********/
diff --git a/test/CodeGen/R600/local-memory.ll b/test/CodeGen/R600/local-memory.ll
index 5458fb9..ca322ab 100644
--- a/test/CodeGen/R600/local-memory.ll
+++ b/test/CodeGen/R600/local-memory.ll
@@ -13,7 +13,7 @@
 ; SI-CHECK-NEXT: .long 32768
 
 ; EG-CHECK: LDS_WRITE
-; SI-CHECK: DS_WRITE_B32
+; SI-CHECK: DS_WRITE_B32 0
 
 ; GROUP_BARRIER must be the last instruction in a clause
 ; EG-CHECK: GROUP_BARRIER
@@ -21,7 +21,7 @@
 ; SI-CHECK: S_BARRIER
 
 ; EG-CHECK: LDS_READ_RET
-; SI-CHECK: DS_READ_B32
+; SI-CHECK: DS_READ_B32 VGPR{{[0-9]}}, 0
 
 define void @local_memory(i32 addrspace(1)* %out) {
 entry:
-- 
1.7.11.4



More information about the mesa-dev mailing list