[Mesa-dev] [PATCH] radeon/llvm: Handle TGSI KIL opcode for SI.

Tom Stellard tom at stellard.net
Tue Aug 28 09:07:09 PDT 2012


On Tue, Aug 28, 2012 at 04:26:43PM +0200, Michel Dänzer wrote:
> From: Michel Dänzer <michel.daenzer at amd.com>
> 
> Fixes piglit fp-kil with radeonsi.
>

> Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
> ---
>  src/gallium/drivers/radeon/SIISelLowering.cpp |   35 +++++++++++++++++++++++++
>  src/gallium/drivers/radeon/SIISelLowering.h   |    2 ++
>  src/gallium/drivers/radeon/SIInstructions.td  |    7 +++++
>  3 files changed, 44 insertions(+)
> 
> diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp
> index 092c2fa..f5eac16 100644
> --- a/src/gallium/drivers/radeon/SIISelLowering.cpp
> +++ b/src/gallium/drivers/radeon/SIISelLowering.cpp
> @@ -129,6 +129,9 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
>    case AMDGPU::SI_INTERP_CONST:
>      LowerSI_INTERP_CONST(MI, *BB, I);
>      break;
> +  case AMDGPU::SI_KIL:
> +    LowerSI_KIL(MI, *BB, I, MRI);
> +    break;
>    case AMDGPU::SI_V_CNDLT:
>      LowerSI_V_CNDLT(MI, *BB, I, MRI);
>      break;
> @@ -193,6 +196,38 @@ void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
>    MI->eraseFromParent();
>  }
>  
> +void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
> +    MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
> +{
> +  /* Clear this pixel from the exec mask if the operand is negative */

Please use // style comments in the LLVM code.

> +  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32),
> +          AMDGPU::VCC)
> +          .addReg(AMDGPU::SREG_LIT_0)
> +          .addOperand(MI->getOperand(0));
> +
> +  /* If the exec mask is non-zero, skip the next two instructions */

This comment is misleading, because it is branching on the VCC status
and not the exec mask status.

> +  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_CBRANCH_VCCNZ))
> +          .addImm(3)
> +          .addReg(AMDGPU::VCC);
> +

I'm a little confused about how this is supposed to work. As I understand
it, the program will branch even if just one of the waves in the wave front
sets their VCC bit (which in this case means the pixel is not killed).
Do we also need to export the exec_mask in the very last export of the
program?

-Tom

> +  /* Exec mask is zero: Export to NULL target... */
> +  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::EXP))
> +          .addImm(0)
> +          .addImm(0x09) // V_008DFC_SQ_EXP_NULL
> +          .addImm(0)
> +          .addImm(1)
> +          .addImm(1)
> +          .addReg(AMDGPU::SREG_LIT_0)
> +          .addReg(AMDGPU::SREG_LIT_0)
> +          .addReg(AMDGPU::SREG_LIT_0)
> +          .addReg(AMDGPU::SREG_LIT_0);
> +
> +  /* ... and terminate wavefront */
> +  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_ENDPGM));
> +
> +  MI->eraseFromParent();
> +}
> +
>  void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
>      MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
>  {
> diff --git a/src/gallium/drivers/radeon/SIISelLowering.h b/src/gallium/drivers/radeon/SIISelLowering.h
> index cf655a1..9609311 100644
> --- a/src/gallium/drivers/radeon/SIISelLowering.h
> +++ b/src/gallium/drivers/radeon/SIISelLowering.h
> @@ -33,6 +33,8 @@ class SITargetLowering : public AMDGPUTargetLowering
>                MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
>    void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
>                MachineBasicBlock::iterator I) const;
> +  void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
> +              MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
>    void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
>                MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
>  
> diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td
> index 3047321..cb43165 100644
> --- a/src/gallium/drivers/radeon/SIInstructions.td
> +++ b/src/gallium/drivers/radeon/SIInstructions.td
> @@ -948,6 +948,13 @@ def SI_INTERP_CONST : InstSI <
>                                                   imm:$attr, SReg_32:$params))]
>  >;
>  
> +def SI_KIL : InstSI <
> +	(outs),
> +	(ins VReg_32:$src),
> +	"SI_KIL $src",
> +	[(int_AMDGPU_kill VReg_32:$src)]
> +>;
> +
>  } // end usesCustomInserter 
>  
>  // SI Psuedo branch instructions.  These are used by the CFG structurizer pass
> -- 
> 1.7.10.4
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list