[Mesa-dev] [PATCH 2/7] AMDGPU: Alternative handling of SI EXEC register for control flow.
Michel Dänzer
michel at daenzer.net
Tue Oct 30 11:39:07 PDT 2012
From: Michel Dänzer <michel.daenzer at amd.com>
This version handles the EXEC register being modified in the if/else blocks,
e.g. for pixel discard.
Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
---
lib/Target/AMDGPU/SIISelLowering.cpp | 20 --------------
lib/Target/AMDGPU/SILowerFlowControl.cpp | 42 +++++++++++++++++++++++++-----
2 files changed, 36 insertions(+), 26 deletions(-)
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index c6f93d7..45f180f 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -228,26 +228,6 @@ void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
.addReg(AMDGPU::SREG_LIT_0)
.addOperand(MI->getOperand(0));
- // If the exec mask is non-zero, skip the next two instructions
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addImm(3)
- .addReg(AMDGPU::EXEC);
-
- // Exec mask is zero: Export to NULL target...
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::EXP))
- .addImm(0)
- .addImm(0x09) // V_008DFC_SQ_EXP_NULL
- .addImm(0)
- .addImm(1)
- .addImm(1)
- .addReg(AMDGPU::SREG_LIT_0)
- .addReg(AMDGPU::SREG_LIT_0)
- .addReg(AMDGPU::SREG_LIT_0)
- .addReg(AMDGPU::SREG_LIT_0);
-
- // ... and terminate wavefront
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_ENDPGM));
-
MI->eraseFromParent();
}
diff --git a/lib/Target/AMDGPU/SILowerFlowControl.cpp b/lib/Target/AMDGPU/SILowerFlowControl.cpp
index 25f113e..b901688 100644
--- a/lib/Target/AMDGPU/SILowerFlowControl.cpp
+++ b/lib/Target/AMDGPU/SILowerFlowControl.cpp
@@ -50,6 +50,7 @@
#include "AMDGPU.h"
#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -117,20 +118,48 @@ bool SILowerFlowControlPass::runOnMachineFunction(MachineFunction &MF) {
AMDGPU::EXEC)
.addOperand(MI.getOperand(0)) // VCC
.addReg(AMDGPU::EXEC);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_XOR_B64),
+ PredicateStack.back())
+ .addReg(PredicateStack.back())
+ .addReg(AMDGPU::EXEC);
MI.eraseFromParent();
break;
case AMDGPU::ELSE:
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_NOT_B64),
- AMDGPU::EXEC)
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
+ UnusedRegisters.back())
.addReg(AMDGPU::EXEC);
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64),
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
AMDGPU::EXEC)
- .addReg(PredicateStack.back())
- .addReg(AMDGPU::EXEC);
+ .addReg(PredicateStack.back());
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
+ PredicateStack.back())
+ .addReg(UnusedRegisters.back());
MI.eraseFromParent();
break;
case AMDGPU::ENDIF:
popExecMask(MBB, I);
+ if (MF.getInfo<SIMachineFunctionInfo>()->ShaderType == ShaderType::PIXEL &&
+ PredicateStack.empty()) {
+ // If the exec mask is non-zero, skip the next two instructions
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addImm(3)
+ .addReg(AMDGPU::EXEC);
+
+ // Exec mask is zero: Export to NULL target...
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::EXP))
+ .addImm(0)
+ .addImm(0x09) // V_008DFC_SQ_EXP_NULL
+ .addImm(0)
+ .addImm(1)
+ .addImm(1)
+ .addReg(AMDGPU::SREG_LIT_0)
+ .addReg(AMDGPU::SREG_LIT_0)
+ .addReg(AMDGPU::SREG_LIT_0)
+ .addReg(AMDGPU::SREG_LIT_0);
+
+ // ... and terminate wavefront
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_ENDPGM));
+ }
MI.eraseFromParent();
break;
}
@@ -156,7 +185,8 @@ void SILowerFlowControlPass::popExecMask(MachineBasicBlock &MBB,
unsigned StackReg = PredicateStack.back();
PredicateStack.pop_back();
UnusedRegisters.push_back(StackReg);
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_OR_B64),
AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
.addReg(StackReg);
}
--
1.7.10.4
More information about the mesa-dev
mailing list