[Mesa-dev] R600/SI Patches: A few cleanups for compute
Tom Stellard
tom at stellard.net
Thu May 16 10:13:05 PDT 2013
On Thu, May 16, 2013 at 08:21:36AM -0700, Vincent Lejeune wrote:
> Hi,
>
>
> >-------------- next part --------------
> >>From dc547a89dac5039ce521f3c27fb23346251d488d Mon Sep 17 00:00:00 2001 >From: Tom Stellard <thomas.stellard at amd.com>
> >Date: Tue, 7 May 2013 16:26:26 -0400
> >Subject: [PATCH 4/7] R600: Swap the legality of rotl and rotr
> >
> >The hardware supports rotr and not rotl.
> >---
> > lib/Target/R600/AMDGPUISelLowering.cpp | 3 +++
> > lib/Target/R600/AMDGPUISelLowering.h | 1 -
> > lib/Target/R600/AMDGPUInstrInfo.td | 6 ------
> > lib/Target/R600/AMDGPUInstructions.td | 6 ++++++
> > lib/Target/R600/AMDILISelLowering.cpp | 2 --
> > lib/Target/R600/R600ISelLowering.cpp | 15 ---------------
> > lib/Target/R600/R600Instructions.td | 6 ++----
> > test/CodeGen/R600/rotr.ll | 29 +++++++++++++++++++++++++++++
> > 8 files changed, 40 insertions(+), 28 deletions(-)
> > create mode 100644 test/CodeGen/R600/rotr.ll
> >
> >diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
> >index a266df5..b3c51e3 100644
> >--- a/lib/Target/R600/AMDGPUISelLowering.cpp
> >+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> >@@ -46,6 +46,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
> > setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
> > setOperationAction(ISD::FRINT, MVT::f32, Legal);
> >
> >+ // The hardware supports ROTR, but not ROTL
> >+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
> >+
> > // Lower floating point store/load to integer store/load to reduce the number
> > // of patterns in tablegen.
> > setOperationAction(ISD::STORE, MVT::f32, Promote);
> >diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
> >index c2a79ea..6f8ab8b 100644
> >--- a/lib/Target/R600/AMDGPUISelLowering.h
> >+++ b/lib/Target/R600/AMDGPUISelLowering.h
> >@@ -115,7 +115,6 @@ enum {
> > RET_FLAG,
> > BRANCH_COND,
> > // End AMDIL ISD Opcodes
> >- BITALIGN,
> > BUFFER_STORE,
> > DWORDADDR,
> > FRACT,
> >diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td
> >index b66ae87..a0a3410 100644
> >--- a/lib/Target/R600/AMDGPUInstrInfo.td
> >+++ b/lib/Target/R600/AMDGPUInstrInfo.td
> >@@ -23,12 +23,6 @@ def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
> > // AMDGPU DAG Nodes
> > //
> >
> >-// out = ((a << 32) | b) >> c)
> >-//
> >-// Can be used to optimize rtol:
> >-// rotl(a, b) = bitalign(a, a, 32 - b)
> >-def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
> >-
> > // This argument to this node is a dword address.
> > def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
> >
> >diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
> >index d2620b2..54df7d0 100644
> >--- a/lib/Target/R600/AMDGPUInstructions.td
> >+++ b/lib/Target/R600/AMDGPUInstructions.td
> >@@ -295,6 +295,12 @@ class BFEPattern <Instruction BFE> : Pat <
> > (BFE $x, $y, $z)
> > >;
> >
> >+// rotr pattern
> >+class ROTRPattern <Instruction BIT_ALIGN> : Pat <
> >+ (rotr i32:$src0, i32:$src1),
> >+ (BIT_ALIGN $src0, $src0, $src1)
> >+>;
> >+
> > include "R600Instructions.td"
> >
> > include "SIInstrInfo.td"
> >diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp
> >index 922cac1..e20dbe0 100644
> >--- a/lib/Target/R600/AMDILISelLowering.cpp
> >+++ b/lib/Target/R600/AMDILISelLowering.cpp
> >@@ -138,8 +138,6 @@ void AMDGPUTargetLowering::InitAMDILLowering() {
> > setOperationAction(ISD::SMUL_LOHI, VT, Expand);
> > setOperationAction(ISD::UMUL_LOHI, VT, Expand);
> >
> >- // GPU doesn't have a rotl, rotr, or byteswap instruction
> >- setOperationAction(ISD::ROTR, VT, Expand);
> > setOperationAction(ISD::BSWAP, VT, Expand);
> >
> > // GPU doesn't have any counting operators
> >diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> >index 7252235..e58a8dd 100644
> >--- a/lib/Target/R600/R600ISelLowering.cpp
> >+++ b/lib/Target/R600/R600ISelLowering.cpp
> >@@ -72,8 +72,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
> > setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
> > setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
> >
> >- setOperationAction(ISD::ROTL, MVT::i32, Custom);
> >-
> > setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
> > setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
> >
> >@@ -327,7 +325,6 @@ using namespace llvm::AMDGPUIntrinsic;
> > SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
> > switch (Op.getOpcode()) {
> > default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
> >- case ISD::ROTL: return LowerROTL(Op, DAG);
> > case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
> > case ISD::SELECT: return LowerSELECT(Op, DAG);
> > case ISD::STORE: return LowerSTORE(Op, DAG);
> >@@ -518,18 +515,6 @@ SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const
> > return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
> > }
> >
> >-SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
> >- DebugLoc DL = Op.getDebugLoc();
> >- EVT VT = Op.getValueType();
> >-
> >- return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
> >- Op.getOperand(0),
> >- Op.getOperand(0),
> >- DAG.getNode(ISD::SUB, DL, VT,
> >- DAG.getConstant(32, MVT::i32),
> >- Op.getOperand(1)));
> >-}
> >-
> > bool R600TargetLowering::isZero(SDValue Op) const {
> > if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
> > return Cst->isNullValue();
> >diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
> >index 8f47523..83d465a 100644
> >--- a/lib/Target/R600/R600Instructions.td
> >+++ b/lib/Target/R600/R600Instructions.td
> >@@ -1620,10 +1620,8 @@ let Predicates = [isEGorCayman] in {
> > def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>;
> > defm : BFIPatterns <BFI_INT_eg>;
> >
> >- def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
> >- [(set i32:$dst, (AMDGPUbitalign i32:$src0, i32:$src1, i32:$src2))],
> >- VecALU
> >- >;
> >+ def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>;
> >+ def : ROTRPattern <BIT_ALIGN_INT_eg>;
>
> Is it possible to move this pattern instanciation in the 5th patch ? It would
> make more sense given the commit message of the patch
>
I need it in this patch in order to replace the optimization that was
lost when I removed the BITALIGN node, otherwise this patch will regress
shaders that use rotr or rotl.
-Tom
More information about the mesa-dev
mailing list