[Mesa-dev] [PATCH 1/2] R600: Optimize and cleanup KILL on SI

Mon Dec 17 14:29:15 PST 2012

On Mon, Dec 17, 2012 at 03:32:23PM +0100, Christian König wrote:
> We shouldn't insert KILL optimization if we don't have a
> kill instruction at all.
> 

For both patches: Reviewed-by: Tom Stellard <thomas.stellard at amd.com>

> Signed-off-by: Christian König <deathsimple at vodafone.de>
> ---
>  lib/Target/AMDGPU/SIISelLowering.cpp     |   14 ----
>  lib/Target/AMDGPU/SIISelLowering.h       |    2 -
>  lib/Target/AMDGPU/SIInstructions.td      |   24 +++---
>  lib/Target/AMDGPU/SILowerControlFlow.cpp |  125 ++++++++++++++++++++----------
>  4 files changed, 94 insertions(+), 71 deletions(-)
> 
> diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
> index cd6e0e9..435b0b3 100644
> --- a/lib/Target/AMDGPU/SIISelLowering.cpp
> +++ b/lib/Target/AMDGPU/SIISelLowering.cpp
> @@ -131,9 +131,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
>    case AMDGPU::SI_INTERP_CONST:
>      LowerSI_INTERP_CONST(MI, *BB, I, MRI);
>      break;
> -  case AMDGPU::SI_KIL:
> -    LowerSI_KIL(MI, *BB, I, MRI);
> -    break;
>    case AMDGPU::SI_WQM:
>      LowerSI_WQM(MI, *BB, I, MRI);
>      break;
> @@ -211,17 +208,6 @@ void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
>    MI->eraseFromParent();
>  }
>  
> -void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
> -    MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
> -  // Clear this pixel from the exec mask if the operand is negative
> -  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32),
> -          AMDGPU::VCC)
> -          .addReg(AMDGPU::SREG_LIT_0)
> -          .addOperand(MI->getOperand(0));
> -
> -  MI->eraseFromParent();
> -}
> -
>  void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
>      MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
>    unsigned VCC = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
> diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
> index c088112..db36eef 100644
> --- a/lib/Target/AMDGPU/SIISelLowering.h
> +++ b/lib/Target/AMDGPU/SIISelLowering.h
> @@ -34,8 +34,6 @@ class SITargetLowering : public AMDGPUTargetLowering {
>                MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
>    void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
>                MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const;
> -  void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
> -              MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
>    void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
>                MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
>    void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
> diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
> index 005be96..cac42da 100644
> --- a/lib/Target/AMDGPU/SIInstructions.td
> +++ b/lib/Target/AMDGPU/SIInstructions.td
> @@ -1080,13 +1080,6 @@ def SI_INTERP_CONST : InstSI <
>                                                   imm:$attr, SReg_32:$params))]
>  >;
>  
> -def SI_KIL : InstSI <
> -  (outs),
> -  (ins VReg_32:$src),
> -  "SI_KIL $src",
> -  [(int_AMDGPU_kill VReg_32:$src)]
> ->;
> -
>  def SI_WQM : InstSI <
>    (outs),
>    (ins),
> @@ -1157,11 +1150,23 @@ def SI_END_CF : InstSI <
>    [(int_SI_end_cf SReg_64:$saved)]
>  >;
>  
> +def SI_KILL : InstSI <
> +  (outs),
> +  (ins VReg_32:$src),
> +  "SI_KIL $src",
> +  [(int_AMDGPU_kill VReg_32:$src)]
> +>;
> +
>  } // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
>    // Uses = [EXEC], Defs = [EXEC]
>  
>  } // end IsCodeGenOnly, isPseudo
>  
> +def : Pat <
> +  (int_AMDGPU_kilp),
> +  (SI_KILL (V_MOV_IMM_I32 0xbf800000))
> +>;
> +
>  /* int_SI_vs_load_input */
>  def : Pat<
>    (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
> @@ -1315,11 +1320,6 @@ def : Pat<
>  >;
>  
>  def : Pat <
> -  (int_AMDGPU_kilp),
> -  (SI_KIL (V_MOV_IMM_I32 0xbf800000))
> ->;
> -
> -def : Pat <
>    (int_AMDGPU_cube VReg_128:$src),
>    (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
>      (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
> diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp
> index 507cb54..34e5f17 100644
> --- a/lib/Target/AMDGPU/SILowerControlFlow.cpp
> +++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp
> @@ -68,7 +68,10 @@ private:
>    static char ID;
>    const TargetInstrInfo *TII;
>  
> -  void Skip(MachineInstr &MI, MachineOperand &To);
> +  bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
> +
> +  void Skip(MachineInstr &From, MachineOperand &To);
> +  void SkipIfDead(MachineInstr &MI);
>  
>    void If(MachineInstr &MI);
>    void Else(MachineInstr &MI);
> @@ -78,6 +81,7 @@ private:
>    void Loop(MachineInstr &MI);
>    void EndCf(MachineInstr &MI);
>  
> +  void Kill(MachineInstr &MI);
>    void Branch(MachineInstr &MI);
>  
>  public:
> @@ -100,23 +104,29 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
>    return new SILowerControlFlowPass(tm);
>  }
>  
> -void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
> +bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,
> +                                        MachineBasicBlock *To) {
>  
>    unsigned NumInstr = 0;
>  
> -  for (MachineBasicBlock *MBB = *From.getParent()->succ_begin();
> -       NumInstr < SkipThreshold && MBB != To.getMBB() && !MBB->succ_empty();
> +  for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty();
>         MBB = *MBB->succ_begin()) {
>  
>      for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
>           NumInstr < SkipThreshold && I != E; ++I) {
>  
>        if (I->isBundle() || !I->isBundled())
> -        ++NumInstr;
> +        if (++NumInstr >= SkipThreshold)
> +          return true;
>      }
>    }
>  
> -  if (NumInstr < SkipThreshold)
> +  return false;
> +}
> +
> +void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
> +
> +  if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB()))
>      return;
>  
>    DebugLoc DL = From.getDebugLoc();
> @@ -125,6 +135,38 @@ void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
>            .addReg(AMDGPU::EXEC);
>  }
>  
> +void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
> +
> +  MachineBasicBlock &MBB = *MI.getParent();
> +  DebugLoc DL = MI.getDebugLoc();
> +
> +  if (!shouldSkip(&MBB, &MBB.getParent()->back()))
> +    return;
> +
> +  MachineBasicBlock::iterator Insert = &MI;
> +  ++Insert;
> +
> +  // If the exec mask is non-zero, skip the next two instructions
> +  BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
> +          .addImm(3)
> +          .addReg(AMDGPU::EXEC);
> +
> +  // Exec mask is zero: Export to NULL target...
> +  BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP))
> +          .addImm(0)
> +          .addImm(0x09) // V_008DFC_SQ_EXP_NULL
> +          .addImm(0)
> +          .addImm(1)
> +          .addImm(1)
> +          .addReg(AMDGPU::SREG_LIT_0)
> +          .addReg(AMDGPU::SREG_LIT_0)
> +          .addReg(AMDGPU::SREG_LIT_0)
> +          .addReg(AMDGPU::SREG_LIT_0);
> +
> +  // ... and terminate wavefront
> +  BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
> +}
> +
>  void SILowerControlFlowPass::If(MachineInstr &MI) {
>  
>    MachineBasicBlock &MBB = *MI.getParent();
> @@ -251,9 +293,28 @@ void SILowerControlFlowPass::Branch(MachineInstr &MI) {
>      assert(0);
>  }
>  
> +void SILowerControlFlowPass::Kill(MachineInstr &MI) {
> +
> +  MachineBasicBlock &MBB = *MI.getParent();
> +  DebugLoc DL = MI.getDebugLoc();
> +
> +  // Kill is only allowed in pixel shaders
> +  MachineFunction &MF = *MBB.getParent();
> +  SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
> +  assert(Info->ShaderType == ShaderType::PIXEL);
> +
> +  // Clear this pixel from the exec mask if the operand is negative
> +  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
> +          .addReg(AMDGPU::SREG_LIT_0)
> +          .addOperand(MI.getOperand(0));
> +
> +  MI.eraseFromParent();
> +}
> +
>  bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
>  
> -  bool HaveCf = false;
> +  bool HaveKill = false;
> +  unsigned Depth = 0;
>  
>    for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
>         BI != BE; ++BI) {
> @@ -267,6 +328,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
>        switch (MI.getOpcode()) {
>          default: break;
>          case AMDGPU::SI_IF:
> +          ++Depth;
>            If(MI);
>            break;
>  
> @@ -287,14 +349,26 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
>            break;
>  
>          case AMDGPU::SI_LOOP:
> +          ++Depth;
>            Loop(MI);
>            break;
>  
>          case AMDGPU::SI_END_CF:
> -          HaveCf = true;
> +          if (--Depth == 0 && HaveKill) {
> +            SkipIfDead(MI);
> +            HaveKill = false;
> +          }
>            EndCf(MI);
>            break;
>  
> +        case AMDGPU::SI_KILL:
> +          if (Depth == 0)
> +            SkipIfDead(MI);
> +          else
> +            HaveKill = true;
> +          Kill(MI);
> +          break;
> +
>          case AMDGPU::S_BRANCH:
>            Branch(MI);
>            break;
> @@ -302,40 +376,5 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
>      }
>    }
>  
> -  // TODO: What is this good for?
> -  unsigned ShaderType = MF.getInfo<SIMachineFunctionInfo>()->ShaderType;
> -  if (HaveCf && ShaderType == ShaderType::PIXEL) {
> -    for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
> -         BI != BE; ++BI) {
> -
> -      MachineBasicBlock &MBB = *BI;
> -      if (MBB.succ_empty()) {
> -
> -        MachineInstr &MI = *MBB.getFirstNonPHI();
> -        DebugLoc DL = MI.getDebugLoc();
> -
> -        // If the exec mask is non-zero, skip the next two instructions
> -        BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
> -               .addImm(3)
> -               .addReg(AMDGPU::EXEC);
> -
> -        // Exec mask is zero: Export to NULL target...
> -        BuildMI(MBB, &MI, DL, TII->get(AMDGPU::EXP))
> -                .addImm(0)
> -                .addImm(0x09) // V_008DFC_SQ_EXP_NULL
> -                .addImm(0)
> -                .addImm(1)
> -                .addImm(1)
> -                .addReg(AMDGPU::SREG_LIT_0)
> -                .addReg(AMDGPU::SREG_LIT_0)
> -                .addReg(AMDGPU::SREG_LIT_0)
> -                .addReg(AMDGPU::SREG_LIT_0);
> -
> -        // ... and terminate wavefront
> -        BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ENDPGM));
> -      }
> -    }
> -  }
> -
>    return true;
>  }
> -- 
> 1.7.9.5
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev