[Mesa-dev] [PATCH 2/2] AMDGPU: Simplify SI control flow lowering

Christian König deathsimple at vodafone.de
Thu Nov 29 02:12:36 PST 2012


By using the S_*_SAVEEXEC_b64 instructions.

Signed-off-by: Christian König <deathsimple at vodafone.de>
---
 lib/Target/AMDGPU/SILowerControlFlow.cpp |   79 ++++++++++++++----------------
 1 file changed, 38 insertions(+), 41 deletions(-)

diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp
index b43fdeb..f91e3ea 100644
--- a/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -29,8 +29,7 @@
 //
 // becomes:
 //
-// %SGPR0 = S_MOV_B64 %EXEC          // Save the current exec mask
-// %EXEC = S_AND_B64 %VCC, %EXEC     // Update the exec mask
+// %SGPR0 = S_AND_SAVEEXEC_B64 %VCC  // Save and update the exec mask
 // %SGPR0 = S_XOR_B64 %SGPR0, %EXEC  // Clear live bits from saved exec mask
 // S_CBRANCH_EXECZ label0            // This instruction is an
 //                                   // optimization which allows us to
@@ -39,14 +38,13 @@
 // %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch
 //
 // label0:
-// %SGPR2 = S_MOV_B64 %EXEC           // Save the current exec mask
-// %EXEC = S_MOV_B64 %SGPR0           // Restore the exec mask for the Then block
-// %SGPR0 = S_MOV_B64 %SGPR2          // Save the exec mask from the If block
+// %SGPR0 = S_OR_SAVEEXEC_B64 %EXEC   // Restore the exec mask for the Then block
+// %EXEC = S_XOR_B64 %SGPR0, %EXEC    // Clear live bits from saved exec mask
 // S_BRANCH_EXECZ label1              // Use our branch optimization
 //                                    // instruction again.
 // %VGPR0 = V_SUB_F32 %VGPR0, %VGPR   // Do the THEN block
 // label1:
-// %EXEC = S_OR_B64 %EXEC, %SGPR0     // Re-enable saved exec mask bits
+// %EXEC = S_OR_B64 %EXEC, %SGPR2     // Re-enable saved exec mask bits
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
@@ -69,8 +67,8 @@ private:
   std::vector<unsigned> PredicateStack;
   std::vector<unsigned> UnusedRegisters;
 
-  void pushExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
-  void popExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
+  unsigned allocReg();
+  void freeReg(unsigned Reg);
 
 public:
   SILowerControlFlowPass(TargetMachine &tm) :
@@ -111,34 +109,43 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
                                I != MBB.end(); I = Next) {
       Next = llvm::next(I);
       MachineInstr &MI = *I;
+      unsigned Reg;
       switch (MI.getOpcode()) {
         default: break;
         case AMDGPU::SI_IF_NZ:
-          pushExecMask(MBB, I);
-          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64),
-                  AMDGPU::EXEC)
-                  .addOperand(MI.getOperand(0)) // VCC
-                  .addReg(AMDGPU::EXEC);
+          Reg = allocReg();
+          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_SAVEEXEC_B64),
+                  Reg)
+                  .addOperand(MI.getOperand(0)); // VCC
           BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_XOR_B64),
-                  PredicateStack.back())
-                  .addReg(PredicateStack.back())
+                  Reg)
+                  .addReg(Reg)
                   .addReg(AMDGPU::EXEC);
           MI.eraseFromParent();
+          PredicateStack.push_back(Reg);
           break;
+
         case AMDGPU::ELSE:
-          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
-                  UnusedRegisters.back())
-                  .addReg(AMDGPU::EXEC);
-          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
+          Reg = PredicateStack.back();
+          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_OR_SAVEEXEC_B64),
+                  Reg)
+                  .addReg(Reg);
+          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_XOR_B64),
                   AMDGPU::EXEC)
-                  .addReg(PredicateStack.back());
-          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
-                  PredicateStack.back())
-                  .addReg(UnusedRegisters.back());
+                  .addReg(Reg)
+                  .addReg(AMDGPU::EXEC);
           MI.eraseFromParent();
           break;
+
         case AMDGPU::ENDIF:
-          popExecMask(MBB, I);
+          Reg = PredicateStack.back();
+          PredicateStack.pop_back();
+          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_OR_B64),
+                  AMDGPU::EXEC)
+                  .addReg(AMDGPU::EXEC)
+                  .addReg(Reg);
+          freeReg(Reg);
+
 	  if (MF.getInfo<SIMachineFunctionInfo>()->ShaderType == ShaderType::PIXEL &&
 	      PredicateStack.empty()) {
             // If the exec mask is non-zero, skip the next two instructions
@@ -166,28 +173,18 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
       }
     }
   }
-  return false;
+  return true;
 }
 
-void SILowerControlFlowPass::pushExecMask(MachineBasicBlock &MBB,
-                                          MachineBasicBlock::iterator I) {
+unsigned SILowerControlFlowPass::allocReg() {
 
   assert(!UnusedRegisters.empty() && "Ran out of registers for predicate stack");
-  unsigned StackReg = UnusedRegisters.back();
+  unsigned Reg = UnusedRegisters.back();
   UnusedRegisters.pop_back();
-  PredicateStack.push_back(StackReg);
-  BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
-          StackReg)
-          .addReg(AMDGPU::EXEC);
+  return Reg;
 }
 
-void SILowerControlFlowPass::popExecMask(MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator I) {
-  unsigned StackReg = PredicateStack.back();
-  PredicateStack.pop_back();
-  UnusedRegisters.push_back(StackReg);
-  BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_OR_B64),
-          AMDGPU::EXEC)
-          .addReg(AMDGPU::EXEC)
-          .addReg(StackReg);
+void SILowerControlFlowPass::freeReg(unsigned Reg) {
+
+  UnusedRegisters.push_back(Reg);
 }
-- 
1.7.9.5



More information about the mesa-dev mailing list