[Mesa-dev] [PATCH 1/2] R600: Optimize and cleanup KILL on SI
Tom Stellard
tom at stellard.net
Mon Dec 17 14:29:15 PST 2012
On Mon, Dec 17, 2012 at 03:32:23PM +0100, Christian König wrote:
> We shouldn't insert KILL optimization if we don't have a
> kill instruction at all.
>
For both patches: Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
> Signed-off-by: Christian König <deathsimple at vodafone.de>
> ---
> lib/Target/AMDGPU/SIISelLowering.cpp | 14 ----
> lib/Target/AMDGPU/SIISelLowering.h | 2 -
> lib/Target/AMDGPU/SIInstructions.td | 24 +++---
> lib/Target/AMDGPU/SILowerControlFlow.cpp | 125 ++++++++++++++++++++----------
> 4 files changed, 94 insertions(+), 71 deletions(-)
>
> diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
> index cd6e0e9..435b0b3 100644
> --- a/lib/Target/AMDGPU/SIISelLowering.cpp
> +++ b/lib/Target/AMDGPU/SIISelLowering.cpp
> @@ -131,9 +131,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
> case AMDGPU::SI_INTERP_CONST:
> LowerSI_INTERP_CONST(MI, *BB, I, MRI);
> break;
> - case AMDGPU::SI_KIL:
> - LowerSI_KIL(MI, *BB, I, MRI);
> - break;
> case AMDGPU::SI_WQM:
> LowerSI_WQM(MI, *BB, I, MRI);
> break;
> @@ -211,17 +208,6 @@ void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
> MI->eraseFromParent();
> }
>
> -void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
> - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
> - // Clear this pixel from the exec mask if the operand is negative
> - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32),
> - AMDGPU::VCC)
> - .addReg(AMDGPU::SREG_LIT_0)
> - .addOperand(MI->getOperand(0));
> -
> - MI->eraseFromParent();
> -}
> -
> void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
> MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
> unsigned VCC = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
> diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
> index c088112..db36eef 100644
> --- a/lib/Target/AMDGPU/SIISelLowering.h
> +++ b/lib/Target/AMDGPU/SIISelLowering.h
> @@ -34,8 +34,6 @@ class SITargetLowering : public AMDGPUTargetLowering {
> MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
> void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
> MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const;
> - void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
> - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
> void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
> MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
> void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
> diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
> index 005be96..cac42da 100644
> --- a/lib/Target/AMDGPU/SIInstructions.td
> +++ b/lib/Target/AMDGPU/SIInstructions.td
> @@ -1080,13 +1080,6 @@ def SI_INTERP_CONST : InstSI <
> imm:$attr, SReg_32:$params))]
> >;
>
> -def SI_KIL : InstSI <
> - (outs),
> - (ins VReg_32:$src),
> - "SI_KIL $src",
> - [(int_AMDGPU_kill VReg_32:$src)]
> ->;
> -
> def SI_WQM : InstSI <
> (outs),
> (ins),
> @@ -1157,11 +1150,23 @@ def SI_END_CF : InstSI <
> [(int_SI_end_cf SReg_64:$saved)]
> >;
>
> +def SI_KILL : InstSI <
> + (outs),
> + (ins VReg_32:$src),
> + "SI_KIL $src",
> + [(int_AMDGPU_kill VReg_32:$src)]
> +>;
> +
> } // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
> // Uses = [EXEC], Defs = [EXEC]
>
> } // end IsCodeGenOnly, isPseudo
>
> +def : Pat <
> + (int_AMDGPU_kilp),
> + (SI_KILL (V_MOV_IMM_I32 0xbf800000))
> +>;
> +
> /* int_SI_vs_load_input */
> def : Pat<
> (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
> @@ -1315,11 +1320,6 @@ def : Pat<
> >;
>
> def : Pat <
> - (int_AMDGPU_kilp),
> - (SI_KIL (V_MOV_IMM_I32 0xbf800000))
> ->;
> -
> -def : Pat <
> (int_AMDGPU_cube VReg_128:$src),
> (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
> (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
> diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp
> index 507cb54..34e5f17 100644
> --- a/lib/Target/AMDGPU/SILowerControlFlow.cpp
> +++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp
> @@ -68,7 +68,10 @@ private:
> static char ID;
> const TargetInstrInfo *TII;
>
> - void Skip(MachineInstr &MI, MachineOperand &To);
> + bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
> +
> + void Skip(MachineInstr &From, MachineOperand &To);
> + void SkipIfDead(MachineInstr &MI);
>
> void If(MachineInstr &MI);
> void Else(MachineInstr &MI);
> @@ -78,6 +81,7 @@ private:
> void Loop(MachineInstr &MI);
> void EndCf(MachineInstr &MI);
>
> + void Kill(MachineInstr &MI);
> void Branch(MachineInstr &MI);
>
> public:
> @@ -100,23 +104,29 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
> return new SILowerControlFlowPass(tm);
> }
>
> -void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
> +bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,
> + MachineBasicBlock *To) {
>
> unsigned NumInstr = 0;
>
> - for (MachineBasicBlock *MBB = *From.getParent()->succ_begin();
> - NumInstr < SkipThreshold && MBB != To.getMBB() && !MBB->succ_empty();
> + for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty();
> MBB = *MBB->succ_begin()) {
>
> for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
> NumInstr < SkipThreshold && I != E; ++I) {
>
> if (I->isBundle() || !I->isBundled())
> - ++NumInstr;
> + if (++NumInstr >= SkipThreshold)
> + return true;
> }
> }
>
> - if (NumInstr < SkipThreshold)
> + return false;
> +}
> +
> +void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
> +
> + if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB()))
> return;
>
> DebugLoc DL = From.getDebugLoc();
> @@ -125,6 +135,38 @@ void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
> .addReg(AMDGPU::EXEC);
> }
>
> +void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
> +
> + MachineBasicBlock &MBB = *MI.getParent();
> + DebugLoc DL = MI.getDebugLoc();
> +
> + if (!shouldSkip(&MBB, &MBB.getParent()->back()))
> + return;
> +
> + MachineBasicBlock::iterator Insert = &MI;
> + ++Insert;
> +
> + // If the exec mask is non-zero, skip the next two instructions
> + BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
> + .addImm(3)
> + .addReg(AMDGPU::EXEC);
> +
> + // Exec mask is zero: Export to NULL target...
> + BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP))
> + .addImm(0)
> + .addImm(0x09) // V_008DFC_SQ_EXP_NULL
> + .addImm(0)
> + .addImm(1)
> + .addImm(1)
> + .addReg(AMDGPU::SREG_LIT_0)
> + .addReg(AMDGPU::SREG_LIT_0)
> + .addReg(AMDGPU::SREG_LIT_0)
> + .addReg(AMDGPU::SREG_LIT_0);
> +
> + // ... and terminate wavefront
> + BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
> +}
> +
> void SILowerControlFlowPass::If(MachineInstr &MI) {
>
> MachineBasicBlock &MBB = *MI.getParent();
> @@ -251,9 +293,28 @@ void SILowerControlFlowPass::Branch(MachineInstr &MI) {
> assert(0);
> }
>
> +void SILowerControlFlowPass::Kill(MachineInstr &MI) {
> +
> + MachineBasicBlock &MBB = *MI.getParent();
> + DebugLoc DL = MI.getDebugLoc();
> +
> + // Kill is only allowed in pixel shaders
> + MachineFunction &MF = *MBB.getParent();
> + SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
> + assert(Info->ShaderType == ShaderType::PIXEL);
> +
> + // Clear this pixel from the exec mask if the operand is negative
> + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
> + .addReg(AMDGPU::SREG_LIT_0)
> + .addOperand(MI.getOperand(0));
> +
> + MI.eraseFromParent();
> +}
> +
> bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
>
> - bool HaveCf = false;
> + bool HaveKill = false;
> + unsigned Depth = 0;
>
> for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
> BI != BE; ++BI) {
> @@ -267,6 +328,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
> switch (MI.getOpcode()) {
> default: break;
> case AMDGPU::SI_IF:
> + ++Depth;
> If(MI);
> break;
>
> @@ -287,14 +349,26 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
> break;
>
> case AMDGPU::SI_LOOP:
> + ++Depth;
> Loop(MI);
> break;
>
> case AMDGPU::SI_END_CF:
> - HaveCf = true;
> + if (--Depth == 0 && HaveKill) {
> + SkipIfDead(MI);
> + HaveKill = false;
> + }
> EndCf(MI);
> break;
>
> + case AMDGPU::SI_KILL:
> + if (Depth == 0)
> + SkipIfDead(MI);
> + else
> + HaveKill = true;
> + Kill(MI);
> + break;
> +
> case AMDGPU::S_BRANCH:
> Branch(MI);
> break;
> @@ -302,40 +376,5 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
> }
> }
>
> - // TODO: What is this good for?
> - unsigned ShaderType = MF.getInfo<SIMachineFunctionInfo>()->ShaderType;
> - if (HaveCf && ShaderType == ShaderType::PIXEL) {
> - for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
> - BI != BE; ++BI) {
> -
> - MachineBasicBlock &MBB = *BI;
> - if (MBB.succ_empty()) {
> -
> - MachineInstr &MI = *MBB.getFirstNonPHI();
> - DebugLoc DL = MI.getDebugLoc();
> -
> - // If the exec mask is non-zero, skip the next two instructions
> - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
> - .addImm(3)
> - .addReg(AMDGPU::EXEC);
> -
> - // Exec mask is zero: Export to NULL target...
> - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::EXP))
> - .addImm(0)
> - .addImm(0x09) // V_008DFC_SQ_EXP_NULL
> - .addImm(0)
> - .addImm(1)
> - .addImm(1)
> - .addReg(AMDGPU::SREG_LIT_0)
> - .addReg(AMDGPU::SREG_LIT_0)
> - .addReg(AMDGPU::SREG_LIT_0)
> - .addReg(AMDGPU::SREG_LIT_0);
> -
> - // ... and terminate wavefront
> - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ENDPGM));
> - }
> - }
> - }
> -
> return true;
> }
> --
> 1.7.9.5
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list