[Mesa-dev] R600/SI Patches: A few cleanups for compute

Thu May 16 10:13:05 PDT 2013

On Thu, May 16, 2013 at 08:21:36AM -0700, Vincent Lejeune wrote:
> Hi,
> 
> 
> >-------------- next part --------------
> >>From dc547a89dac5039ce521f3c27fb23346251d488d Mon Sep 17 00:00:00 2001 >From: Tom Stellard <thomas.stellard at amd.com>
> >Date: Tue, 7 May 2013 16:26:26 -0400
> >Subject: [PATCH 4/7] R600: Swap the legality of rotl and rotr
> >
> >The hardware supports rotr and not rotl.
> >---
> > lib/Target/R600/AMDGPUISelLowering.cpp |  3 +++
> > lib/Target/R600/AMDGPUISelLowering.h   |  1 -
> > lib/Target/R600/AMDGPUInstrInfo.td     |  6 ------
> > lib/Target/R600/AMDGPUInstructions.td  |  6 ++++++
> > lib/Target/R600/AMDILISelLowering.cpp  |  2 --
> > lib/Target/R600/R600ISelLowering.cpp   | 15 ---------------
> > lib/Target/R600/R600Instructions.td    |  6 ++----
> > test/CodeGen/R600/rotr.ll              | 29 +++++++++++++++++++++++++++++
> > 8 files changed, 40 insertions(+), 28 deletions(-)
> > create mode 100644 test/CodeGen/R600/rotr.ll
> >
> >diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
> >index a266df5..b3c51e3 100644
> >--- a/lib/Target/R600/AMDGPUISelLowering.cpp
> >+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> >@@ -46,6 +46,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
> >   setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
> >   setOperationAction(ISD::FRINT,  MVT::f32, Legal);
> > 
> >+  // The hardware supports ROTR, but not ROTL
> >+  setOperationAction(ISD::ROTL, MVT::i32, Expand);
> >+
> >   // Lower floating point store/load to integer store/load to reduce the number
> >   // of patterns in tablegen.
> >   setOperationAction(ISD::STORE, MVT::f32, Promote);
> >diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
> >index c2a79ea..6f8ab8b 100644
> >--- a/lib/Target/R600/AMDGPUISelLowering.h
> >+++ b/lib/Target/R600/AMDGPUISelLowering.h
> >@@ -115,7 +115,6 @@ enum {
> >   RET_FLAG,
> >   BRANCH_COND,
> >   // End AMDIL ISD Opcodes
> >-  BITALIGN,
> >   BUFFER_STORE,
> >   DWORDADDR,
> >   FRACT,
> >diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td
> >index b66ae87..a0a3410 100644
> >--- a/lib/Target/R600/AMDGPUInstrInfo.td
> >+++ b/lib/Target/R600/AMDGPUInstrInfo.td
> >@@ -23,12 +23,6 @@ def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
> > // AMDGPU DAG Nodes
> > //
> > 
> >-// out = ((a << 32) | b) >> c)
> >-//
> >-// Can be used to optimize rtol:
> >-// rotl(a, b) = bitalign(a, a, 32 - b)
> >-def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
> >-
> > // This argument to this node is a dword address.
> > def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
> > 
> >diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
> >index d2620b2..54df7d0 100644
> >--- a/lib/Target/R600/AMDGPUInstructions.td
> >+++ b/lib/Target/R600/AMDGPUInstructions.td
> >@@ -295,6 +295,12 @@ class BFEPattern <Instruction BFE> : Pat <
> >   (BFE $x, $y, $z)
> > >;
> > 
> >+// rotr pattern
> >+class ROTRPattern <Instruction BIT_ALIGN> : Pat <
> >+  (rotr i32:$src0, i32:$src1),
> >+  (BIT_ALIGN $src0, $src0, $src1)
> >+>;
> >+
> > include "R600Instructions.td"
> > 
> > include "SIInstrInfo.td"
> >diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp
> >index 922cac1..e20dbe0 100644
> >--- a/lib/Target/R600/AMDILISelLowering.cpp
> >+++ b/lib/Target/R600/AMDILISelLowering.cpp
> >@@ -138,8 +138,6 @@ void AMDGPUTargetLowering::InitAMDILLowering() {
> >     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
> >     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
> > 
> >-    // GPU doesn't have a rotl, rotr, or byteswap instruction
> >-    setOperationAction(ISD::ROTR, VT, Expand);
> >     setOperationAction(ISD::BSWAP, VT, Expand);
> > 
> >     // GPU doesn't have any counting operators
> >diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> >index 7252235..e58a8dd 100644
> >--- a/lib/Target/R600/R600ISelLowering.cpp
> >+++ b/lib/Target/R600/R600ISelLowering.cpp
> >@@ -72,8 +72,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
> >   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
> >   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
> > 
> >-  setOperationAction(ISD::ROTL, MVT::i32, Custom);
> >-
> >   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
> >   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
> > 
> >@@ -327,7 +325,6 @@ using namespace llvm::AMDGPUIntrinsic;
> > SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
> >   switch (Op.getOpcode()) {
> >   default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
> >-  case ISD::ROTL: return LowerROTL(Op, DAG);
> >   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
> >   case ISD::SELECT: return LowerSELECT(Op, DAG);
> >   case ISD::STORE: return LowerSTORE(Op, DAG);
> >@@ -518,18 +515,6 @@ SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const
> >   return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
> > }
> > 
> >-SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
> >-  DebugLoc DL = Op.getDebugLoc();
> >-  EVT VT = Op.getValueType();
> >-
> >-  return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
> >-                     Op.getOperand(0),
> >-                     Op.getOperand(0),
> >-                     DAG.getNode(ISD::SUB, DL, VT,
> >-                                 DAG.getConstant(32, MVT::i32),
> >-                                 Op.getOperand(1)));
> >-}
> >-
> > bool R600TargetLowering::isZero(SDValue Op) const {
> >   if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
> >     return Cst->isNullValue();
> >diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
> >index 8f47523..83d465a 100644
> >--- a/lib/Target/R600/R600Instructions.td
> >+++ b/lib/Target/R600/R600Instructions.td
> >@@ -1620,10 +1620,8 @@ let Predicates = [isEGorCayman] in {
> >   def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>;
> >   defm : BFIPatterns <BFI_INT_eg>;
> > 
> >-  def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
> >-    [(set i32:$dst, (AMDGPUbitalign i32:$src0, i32:$src1, i32:$src2))],
> >-    VecALU
> >-  >;
> >+  def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>;
> >+  def : ROTRPattern <BIT_ALIGN_INT_eg>;
> 
> Is it possible to move this pattern instanciation in the 5th patch ? It would
> make more sense given the commit message of the patch
> 

I need it in this patch in order to replace the optimization that was
lost when I removed the BITALIGN node, otherwise this patch will regress
shaders that use rotr or rotl.

-Tom