Mesa (master): radeon/llvm: Use the VLIW Scheduler for R600->NI

Tom Stellard tstellar at kemper.freedesktop.org
Thu Jun 21 20:50:12 UTC 2012


Module: Mesa
Branch: master
Commit: cd287301ec598d2811f3f85c03d23bae01be2359
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=cd287301ec598d2811f3f85c03d23bae01be2359

Author: Tom Stellard <thomas.stellard at amd.com>
Date:   Tue Jun 19 18:47:18 2012 -0400

radeon/llvm: Use the VLIW Scheduler for R600->NI

It's not optimal, but it's better than the register pressure scheduler
that was previously being used.  The VLIW scheduler currently ignores
all the complicated instruction groups restrictions and just tries to
fill the instruction groups with as many instructions as possible.
Though, it does know enough not to put two trans only instructions in
the same group.

We are able to ignore the instruction group restrictions in the LLVM
backend, because the finalizer in r600_asm.c will fix any illegal
instruction groups the backend generates.

Enabling the VLIW scheduler improved the run time for a sha1 compute
shader by about 50%.  I'm not sure what the impact will be for graphics
shaders.  I tested Lightsmark with the VLIW scheduler enabled and the
framerate was about the same, but it might help apps that use really
big shaders.

---

 src/gallium/drivers/radeon/AMDGPUInstructions.td   |    1 +
 src/gallium/drivers/radeon/AMDGPUSubtarget.h       |   36 ++++++++++++++++++++
 src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp |    1 +
 src/gallium/drivers/radeon/AMDGPUTargetMachine.h   |   10 ++++--
 src/gallium/drivers/radeon/AMDILBase.td            |    4 ++
 src/gallium/drivers/radeon/AMDILFormats.td         |    1 +
 src/gallium/drivers/radeon/Makefile                |    3 ++
 src/gallium/drivers/radeon/Makefile.sources        |    3 +-
 src/gallium/drivers/radeon/R600ISelLowering.cpp    |    2 +-
 src/gallium/drivers/radeon/R600InstrInfo.cpp       |   11 ++++++
 src/gallium/drivers/radeon/R600InstrInfo.h         |    5 +++
 src/gallium/drivers/radeon/R600Schedule.td         |    6 ++--
 12 files changed, 75 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeon/AMDGPUInstructions.td b/src/gallium/drivers/radeon/AMDGPUInstructions.td
index 9ec9c4d..d6897d5 100644
--- a/src/gallium/drivers/radeon/AMDGPUInstructions.td
+++ b/src/gallium/drivers/radeon/AMDGPUInstructions.td
@@ -21,6 +21,7 @@ class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instructio
   let InOperandList = ins;
   let AsmString = asm;
   let Pattern = pattern;
+  let Itinerary = NullALU;
   let TSFlags{42-40} = Gen;
   let TSFlags{63-48} = AMDILOp;
 }
diff --git a/src/gallium/drivers/radeon/AMDGPUSubtarget.h b/src/gallium/drivers/radeon/AMDGPUSubtarget.h
new file mode 100644
index 0000000..96ace88
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPUSubtarget.h
@@ -0,0 +1,36 @@
+//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file declares the AMDGPU specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDGPUSUBTARGET_H_
+#define _AMDGPUSUBTARGET_H_
+#include "AMDILSubtarget.h"
+
+namespace llvm {
+
+class AMDGPUSubtarget : public AMDILSubtarget
+{
+  InstrItineraryData InstrItins;
+
+public:
+  AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
+    AMDILSubtarget(TT, CPU, FS)
+  {
+    InstrItins = getInstrItineraryForCPU(CPU);
+  }
+
+  const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+};
+
+} // End namespace llvm
+
+#endif // AMDGPUSUBTARGET_H_
diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
index c6a2412..7b199f1 100644
--- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
@@ -50,6 +50,7 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
   FrameLowering(TargetFrameLowering::StackGrowsUp,
       Subtarget.device()->getStackAlignment(), 0),
   IntrinsicInfo(this),
+  InstrItins(&Subtarget.getInstrItineraryData()),
   mDump(false)
 
 {
diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.h b/src/gallium/drivers/radeon/AMDGPUTargetMachine.h
index 84a1ea3..b419a6f 100644
--- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.h
+++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.h
@@ -15,9 +15,9 @@
 #define AMDGPU_TARGET_MACHINE_H
 
 #include "AMDGPUInstrInfo.h"
+#include "AMDGPUSubtarget.h"
 #include "AMDILFrameLowering.h"
 #include "AMDILIntrinsicInfo.h"
-#include "AMDILSubtarget.h"
 #include "R600ISelLowering.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/Target/TargetData.h"
@@ -28,12 +28,13 @@ MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
 
 class AMDGPUTargetMachine : public LLVMTargetMachine {
 
-  AMDILSubtarget Subtarget;
+  AMDGPUSubtarget Subtarget;
   const TargetData DataLayout;
   AMDILFrameLowering FrameLowering;
   AMDILIntrinsicInfo IntrinsicInfo;
   const AMDGPUInstrInfo * InstrInfo;
   AMDGPUTargetLowering * TLInfo;
+  const InstrItineraryData* InstrItins;
   bool mDump;
 
 public:
@@ -50,13 +51,16 @@ public:
      return &IntrinsicInfo;
    }
    virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
-   virtual const AMDILSubtarget *getSubtargetImpl() const {return &Subtarget; }
+   virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
    virtual const AMDGPURegisterInfo *getRegisterInfo() const {
       return &InstrInfo->getRegisterInfo();
    }
    virtual AMDGPUTargetLowering * getTargetLowering() const {
       return TLInfo;
    }
+   virtual const InstrItineraryData* getInstrItineraryData() const {
+      return InstrItins;
+   }
    virtual const TargetData* getTargetData() const { return &DataLayout; }
    virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
    virtual bool addPassesToEmitFile(PassManagerBase &PM,
diff --git a/src/gallium/drivers/radeon/AMDILBase.td b/src/gallium/drivers/radeon/AMDILBase.td
index 31ebed3..8a2d34a 100644
--- a/src/gallium/drivers/radeon/AMDILBase.td
+++ b/src/gallium/drivers/radeon/AMDILBase.td
@@ -11,6 +11,10 @@
 
 include "llvm/Target/Target.td"
 
+// Dummy Instruction itineraries for pseudo instructions
+def ALU_NULL : FuncUnit;
+def NullALU : InstrItinClass;
+
 //===----------------------------------------------------------------------===//
 // AMDIL Subtarget features.
 //===----------------------------------------------------------------------===//
diff --git a/src/gallium/drivers/radeon/AMDILFormats.td b/src/gallium/drivers/radeon/AMDILFormats.td
index 25ca9a0..5418c64 100644
--- a/src/gallium/drivers/radeon/AMDILFormats.td
+++ b/src/gallium/drivers/radeon/AMDILFormats.td
@@ -25,6 +25,7 @@ class ILFormat<ILOpCode op, dag outs, dag ins, string asmstr, list<dag> pattern>
      let Pattern = pattern;
      let AsmString = !strconcat(asmstr, "\n");
      let isPseudo = 1;
+     let Itinerary = NullALU;
      bit hasIEEEFlag = 0;
      bit hasZeroOpFlag = 0;
 }
diff --git a/src/gallium/drivers/radeon/Makefile b/src/gallium/drivers/radeon/Makefile
index 955c41e..ced202f 100644
--- a/src/gallium/drivers/radeon/Makefile
+++ b/src/gallium/drivers/radeon/Makefile
@@ -65,6 +65,9 @@ AMDGPUGenIntrinsics.inc: *.td
 AMDGPUGenCodeEmitter.inc: *.td
 	$(call tablegen, -gen-emitter, AMDGPU.td, $@)
 
+AMDGPUGenDFAPacketizer.inc: *.td
+	$(call tablegen, -gen-dfa-packetizer, AMDGPU.td, $@)
+
 LOADER_LIBS=$(shell llvm-config --libs bitreader asmparser)
 loader: loader.o libradeon.a
 	gcc -o loader -L/usr/local/lib $(LDFLAGS) loader.o libradeon.a $(LLVM_LIBS) $(LOADER_LIBS) -lpthread -ldl -lstdc++ -lm
diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources
index b5665ce..fc7b652 100644
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -12,7 +12,8 @@ GENERATED_SOURCES := \
 	AMDGPUGenSubtargetInfo.inc		\
 	AMDGPUGenEDInfo.inc		\
 	AMDGPUGenIntrinsics.inc		\
-	AMDGPUGenCodeEmitter.inc
+	AMDGPUGenCodeEmitter.inc	\
+	AMDGPUGenDFAPacketizer.inc
 
 CPP_SOURCES := \
 	AMDIL7XXDevice.cpp		\
diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp
index bb034be..3e021a2 100644
--- a/src/gallium/drivers/radeon/R600ISelLowering.cpp
+++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp
@@ -25,7 +25,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
     TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
 {
   setOperationAction(ISD::MUL, MVT::i64, Expand);
-//  setSchedulingPreference(Sched::VLIW);
   addRegisterClass(MVT::v4f32, &AMDIL::R600_Reg128RegClass);
   addRegisterClass(MVT::f32, &AMDIL::R600_Reg32RegClass);
   addRegisterClass(MVT::v4i32, &AMDIL::R600_Reg128RegClass);
@@ -34,6 +33,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
 
   setOperationAction(ISD::FSUB, MVT::f32, Expand);
 
+  setSchedulingPreference(Sched::VLIW);
 }
 
 MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp
index d1246d3..e11c0bd 100644
--- a/src/gallium/drivers/radeon/R600InstrInfo.cpp
+++ b/src/gallium/drivers/radeon/R600InstrInfo.cpp
@@ -13,8 +13,12 @@
 
 #include "R600InstrInfo.h"
 #include "AMDGPUTargetMachine.h"
+#include "AMDILSubtarget.h"
 #include "R600RegisterInfo.h"
 
+#define GET_INSTRINFO_CTOR
+#include "AMDGPUGenDFAPacketizer.inc"
+
 using namespace llvm;
 
 R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
@@ -92,3 +96,10 @@ bool R600InstrInfo::isMov(unsigned Opcode) const
     return true;
   }
 }
+
+DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
+    const ScheduleDAG *DAG) const
+{
+  const InstrItineraryData *II = TM->getInstrItineraryData();
+  return TM->getSubtarget<AMDILSubtarget>().createDFAPacketizer(II);
+}
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.h b/src/gallium/drivers/radeon/R600InstrInfo.h
index f2a1098..b9cbcc8 100644
--- a/src/gallium/drivers/radeon/R600InstrInfo.h
+++ b/src/gallium/drivers/radeon/R600InstrInfo.h
@@ -23,6 +23,8 @@
 namespace llvm {
 
   class AMDGPUTargetMachine;
+  class DFAPacketizer;
+  class ScheduleDAG;
   class MachineFunction;
   class MachineInstr;
   class MachineInstrBuilder;
@@ -52,6 +54,9 @@ namespace llvm {
 
   virtual unsigned getIEQOpcode() const;
   virtual bool isMov(unsigned Opcode) const;
+
+  DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
+                                           const ScheduleDAG *DAG) const;
 };
 
 } // End llvm namespace
diff --git a/src/gallium/drivers/radeon/R600Schedule.td b/src/gallium/drivers/radeon/R600Schedule.td
index d195790..7ede181 100644
--- a/src/gallium/drivers/radeon/R600Schedule.td
+++ b/src/gallium/drivers/radeon/R600Schedule.td
@@ -20,17 +20,17 @@ def ALU_Z : FuncUnit;
 def ALU_W : FuncUnit;
 def TRANS : FuncUnit;
 
-
 def AnyALU : InstrItinClass;
 def VecALU : InstrItinClass;
 def TransALU : InstrItinClass;
 
 def R600_EG_Itin : ProcessorItineraries <
-  [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS],
+  [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL],
   [],
   [
     InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>,
     InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>,
-    InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>
+    InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>,
+    InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
   ]
 >;




More information about the mesa-commit mailing list