[Mesa-dev] [PATCH 2/3] radeon/llvm: support for interpolation intrinsics

Tue Sep 18 07:42:02 PDT 2012

On Tue, Sep 18, 2012 at 03:59:27PM +0200, Vincent Lejeune wrote:
> ---
>  src/gallium/drivers/radeon/AMDGPUISelLowering.cpp  |   2 +
>  src/gallium/drivers/radeon/AMDGPUISelLowering.h    |   2 +
>  src/gallium/drivers/radeon/AMDGPUIntrinsics.td     |   6 ++
>  .../drivers/radeon/R600ExpandSpecialInstrs.cpp     | 111 +++++++++++++++++++++
>  src/gallium/drivers/radeon/R600ISelLowering.cpp    |  88 +++++++++++++++-
>  src/gallium/drivers/radeon/R600ISelLowering.h      |   1 +
>  src/gallium/drivers/radeon/R600Instructions.td     |  57 +++++++++++
>  .../drivers/radeon/R600MachineFunctionInfo.cpp     |  17 +++-
>  .../drivers/radeon/R600MachineFunctionInfo.h       |   5 +
>  9 files changed, 287 insertions(+), 2 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
> index 0a70164..72cd099 100644
> --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
> +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
> @@ -349,5 +349,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
>    NODE_NAME_CASE(SMIN)
>    NODE_NAME_CASE(UMIN)
>    NODE_NAME_CASE(URECIP)
> +  NODE_NAME_CASE(INTERP)
> +  NODE_NAME_CASE(INTERP_P0)
>    }
>  }
> diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.h b/src/gallium/drivers/radeon/AMDGPUISelLowering.h
> index 4c100da..0d79786 100644
> --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.h
> +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.h
> @@ -121,6 +121,8 @@ enum
>    SMIN,
>    UMIN,
>    URECIP,
> +  INTERP,
> +  INTERP_P0,
>    LAST_AMDGPU_ISD_NUMBER
>  };
>  
> diff --git a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td
> index 89cc7e1..df812b1 100644
> --- a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td
> +++ b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td
> @@ -19,6 +19,12 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
>    def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
>    def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
>  
> +  def int_AMDGPU_load_input_perspective : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
> +  def int_AMDGPU_load_input_constant : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
> +  def int_AMDGPU_load_input_linear : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
> +  def int_AMDGPU_load_input_position : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
> +  def int_AMDGPU_load_input_face : Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrNoMem]>;
> + 

Instead of IntrNoMem, these should be IntrReadMem, because they read
from LDS.  Also, if they are only being used by R600, they should use
the R600 prefix and go in R600Intrinsics.td

>    def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
>    def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
>    def int_AMDGPU_cos : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> diff --git a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp
> index 69ab0ff..0945a8d 100644
> --- a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp
> +++ b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp
> @@ -15,6 +15,7 @@
>  #include "R600Defines.h"
>  #include "R600InstrInfo.h"
>  #include "R600RegisterInfo.h"
> +#include "R600MachineFunctionInfo.h"
>  #include "llvm/CodeGen/MachineFunctionPass.h"
>  #include "llvm/CodeGen/MachineInstrBuilder.h"
>  #include "llvm/CodeGen/MachineRegisterInfo.h"
> @@ -29,6 +30,9 @@ private:
>    static char ID;
>    const R600InstrInfo *TII;
>  
> +  bool ExpandInputPerspective(MachineInstr& MI);
> +  bool ExpandInputConstant(MachineInstr& MI);
> +  
>  public:
>    R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
>      TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
> @@ -48,6 +52,108 @@ FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
>    return new R600ExpandSpecialInstrsPass(TM);
>  }
>  
> +bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI)
> +{
> +  const R600RegisterInfo &TRI = TII->getRegisterInfo();
> +  if (MI.getOpcode() != AMDGPU::input_perspective)
> +    return false;
> +  
> +  MachineBasicBlock::iterator I = &MI;
> +  unsigned DstReg = MI.getOperand(0).getReg();
> +  R600MachineFunctionInfo * MFI = MI.getParent()->getParent()
> +      ->getInfo<R600MachineFunctionInfo>();

Coding style: When you declare pointers and references, make sure the symbol is
attached to the variable name (e.g. R600MachineFunctionInfo *MFI)

> +  unsigned IJIndexBase;
> +  switch (MI.getOperand(1).getImm()) {
> +  case 0:
> +    IJIndexBase = MFI->GetIJPerspectiveIndex();
> +    break;
> +  case 1:
> +    IJIndexBase = MFI->GetIJLinearIndex();
> +    break;
> +  default:
> +    assert(0 && "Unknow ij index");
> +  }
> +
> +  for (unsigned i = 0; i < 8; i++) {

Why 8 here?

> +    unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister(
> +        2 * IJIndexBase + ((i + 1) % 2));
> +    unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
> +        4 * MI.getOperand(2).getImm());
> +    
> +    unsigned Sel;
> +    switch (i % 4) {

Why %4 ?

> +    case 0:Sel = AMDGPU::sel_x;break;
> +    case 1:Sel = AMDGPU::sel_y;break;
> +    case 2:Sel = AMDGPU::sel_z;break;
> +    case 3:Sel = AMDGPU::sel_w;break;
> +    default:break;
> +    }
> +
> +    unsigned Res = TRI.getSubReg(DstReg, Sel);
> +	  
> +    const MCInstrDesc & opcode = (i < 4)?

Coding Style: const MCInstrDesc &opcode
Also, why (i < 4)

> +        TII->get(AMDGPU::INTERP_ZW):
> +        TII->get(AMDGPU::INTERP_XY);
> +  
> +    MachineInstr *NewMI = BuildMI(*(MI.getParent()),
> +        I, MI.getParent()->findDebugLoc(I),
> +        opcode, Res)
> +        .addReg(IJIndex)
> +        .addReg(ReadReg)
> +        .addImm(0);
> +    
> +    if (!(i> 1 && i < 6)) {

Why (i > 1 && i < 6) ?

> +      TII->addFlag(NewMI, 0, MO_FLAG_MASK);
> +    }
> +	  
> +    if (i % 4 !=  3)
> +      TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
> +  }

I'm not quite sure what's happening in this loop, could you add some
comments or replace some of the "magic numbers" with constant variables.

> +  
> +  MI.eraseFromParent();
> +
> +  return true;
> +}
> +
> +bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI)
> +{
> +  const R600RegisterInfo &TRI = TII->getRegisterInfo();
> +  if (MI.getOpcode() != AMDGPU::input_constant)
> +    return false;
> +  
> +  MachineBasicBlock::iterator I = &MI;
> +  unsigned DstReg = MI.getOperand(0).getReg();
> +
> +  for (unsigned i = 0; i < 4; i++) {
> +    unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
> +        4 * MI.getOperand(1).getImm() + i);
> +    
> +    unsigned Sel;
> +    switch (i % 4) {
> +    case 0:Sel = AMDGPU::sel_x;break;
> +    case 1:Sel = AMDGPU::sel_y;break;
> +    case 2:Sel = AMDGPU::sel_z;break;
> +    case 3:Sel = AMDGPU::sel_w;break;
> +    default:break;
> +    }
> +	  
> +    unsigned Res = TRI.getSubReg(DstReg, Sel);
> +  
> +    MachineInstr *NewMI = BuildMI(*(MI.getParent()),
> +        I, MI.getParent()->findDebugLoc(I),
> +        TII->get(AMDGPU::INTERP_LOAD_P0), Res)
> +        .addReg(ReadReg)
> +        .addImm(0);
> +	  
> +    if (i % 4 !=  3)
> +      TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
> +  }
> +  
> +  MI.eraseFromParent();
> +
> +  return true;
> +}
> +
>  bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
>  
>    const R600RegisterInfo &TRI = TII->getRegisterInfo();
> @@ -59,6 +165,11 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
>      while (I != MBB.end()) {
>        MachineInstr &MI = *I;
>        I = llvm::next(I);
> +	
> +	if (ExpandInputPerspective(MI))
> +	  continue;
> +	if (ExpandInputConstant(MI))
> +	  continue;
>  
>        bool IsReduction = TII->isReductionOp(MI.getOpcode());
>        bool IsVector = TII->isVector(MI);
> diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp
> index 5642ee8..354f0fb 100644
> --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp
> +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp
> @@ -40,6 +40,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
>  
>    setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
>    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
> +  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
>  
>    setOperationAction(ISD::ROTL, MVT::i32, Custom);
>  
> @@ -231,6 +232,29 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
>                .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
>        break;
>      }
> +  case AMDGPU::input_perspective:
> +    {
> +      R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
> +
> +      // XXX Be more fine about register reservation
> +      for (unsigned i = 0; i < 4; i ++) {
> +        unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i);
> +        MFI->ReservedRegs.push_back(ReservedReg);
> +      }
> +
> +      switch (MI->getOperand(1).getImm()) {
> +      case 0:// Perspective
> +        MFI->HasPerspectiveInterpolation = true;
> +        break;
> +      case 1:// Linear
> +        MFI->HasLinearInterpolation = true;
> +        break;
> +      default:
> +        assert(0 && "Unknow ij index");
> +      }
> +
> +      return BB;
> +    }
>    }
>  
>    MI->eraseFromParent();
> @@ -285,7 +309,48 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
>        unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
>        return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
>      }
> -
> +    case AMDGPUIntrinsic::AMDGPU_load_input_perspective: {
> +      unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
> +      SDValue FullVector = DAG.getNode(
> +          AMDGPUISD::INTERP,
> +          DL, MVT::v4f32,
> +          DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
> +      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
> +        DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
> +    }
> +    case AMDGPUIntrinsic::AMDGPU_load_input_linear: {
> +      unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
> +      SDValue FullVector = DAG.getNode(
> +        AMDGPUISD::INTERP,
> +        DL, MVT::v4f32,
> +        DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
> +      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
> +        DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
> +    }
> +    case AMDGPUIntrinsic::AMDGPU_load_input_constant: {
> +      unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
> +      SDValue FullVector = DAG.getNode(
> +        AMDGPUISD::INTERP_P0,
> +        DL, MVT::v4f32,
> +        DAG.getConstant(slot / 4 , MVT::i32));
> +      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
> +          DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
> +    }
> +    case AMDGPUIntrinsic::AMDGPU_load_input_position: {
> +      unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
> +      unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
> +      SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
> +	    RegIndex, MVT::f32);
> +      if ((slot % 4) == 3) {
> +        return DAG.getNode(ISD::FDIV,
> +            DL, VT,
> +            DAG.getConstantFP(1.0f, MVT::f32),
> +            Reg);
> +      } else {
> +        return Reg;
> +      }
> +    }
> + 
>      case r600_read_ngroups_x:
>        return LowerImplicitParameter(DAG, VT, DL, 0);
>      case r600_read_ngroups_y:
> @@ -338,9 +403,30 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
>    switch (N->getOpcode()) {
>    default: return;
>    case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
> +  case ISD::INTRINSIC_WO_CHAIN:
> +    {
> +      unsigned IntrinsicID =
> +          cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
> +      if (IntrinsicID == AMDGPUIntrinsic::AMDGPU_load_input_face) {
> +        Results.push_back(LowerInputFace(N, DAG));
> +      } else {
> +        return;
> +      }
> +    }
>    }
>  }
>  
> +SDValue R600TargetLowering::LowerInputFace(SDNode* Op, SelectionDAG &DAG) const
> +{
> +  unsigned slot = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
> +  unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
> +  SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
> +      RegIndex, MVT::f32);
> +  return DAG.getNode(ISD::SETCC, Op->getDebugLoc(), MVT::i1,
> +      Reg, DAG.getConstantFP(0.0f, MVT::f32),
> +      DAG.getCondCode(ISD::SETUGT));
> +}
> +
>  SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const
>  {
>    return DAG.getNode(
> diff --git a/src/gallium/drivers/radeon/R600ISelLowering.h b/src/gallium/drivers/radeon/R600ISelLowering.h
> index 49ea272..7c81f5f 100644
> --- a/src/gallium/drivers/radeon/R600ISelLowering.h
> +++ b/src/gallium/drivers/radeon/R600ISelLowering.h
> @@ -50,6 +50,7 @@ private:
>  
>    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
>    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
> +  SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const;
>    SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
>  };
>  
> diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
> index 75c6825..5756dfc 100644
> --- a/src/gallium/drivers/radeon/R600Instructions.td
> +++ b/src/gallium/drivers/radeon/R600Instructions.td
> @@ -233,6 +233,63 @@ def isEGorCayman : Predicate<"Subtarget.device()"
>  def isR600toCayman : Predicate<
>                       "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
>  
> +//===----------------------------------------------------------------------===//
> +// Interpolation Instructions
> +//===----------------------------------------------------------------------===//
> +
> +def INTERP: SDNode<"AMDGPUISD::INTERP",
> +SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]>
> +>;
> +
> +def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0",
> +SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]>
> +>;
> +
> +let usesCustomInserter = 1 in {
> +def input_perspective :  AMDGPUShaderInst <
> +(outs R600_Reg128:$dst),
> +(ins i32imm:$src0, i32imm:$src1),
> +"input_perspective $src0 $src1 : dst",
> +[(set R600_Reg128:$dst, (INTERP (i32 imm:$src0), (i32 imm:$src1)))]>;
> +}

Could you add a comment here denoting the end of usesCustomInserter = 1
There are examples of this elsewhere in the file.

> +
> +def input_constant :  AMDGPUShaderInst <
> +(outs R600_Reg128:$dst),
> +(ins i32imm:$src),
> +"input_perspective $src : dst",
> +[(set R600_Reg128:$dst, (INTERP_P0 (i32 imm:$src)))]>;
> +
> +
> +
> +def INTERP_XY : InstR600 <0xD6,
> +(outs R600_Reg32:$dst),
> +(ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags),
> +"INTERP_XY dst",
> +[], AnyALU
> +>
> +{
> +let FlagOperandIdx = 3;
> +}
> +
> +def INTERP_ZW : InstR600 <0xD7,
> +(outs R600_Reg32:$dst),
> +(ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags),
> +"INTERP_ZW dst",
> +[], AnyALU
> +>
> +{
> +let FlagOperandIdx = 3;
> +}
> +
> +def INTERP_LOAD_P0 : InstR600 <0xE0,
> +(outs R600_Reg32:$dst),
> +(ins R600_Reg32:$src, i32imm:$flags),
> +"INTERP_LOAD_P0 dst",
> +[], AnyALU
> +>
> +{
> +let FlagOperandIdx = 2;
> +}
> 

Could you add some indentation to these interpolation instruction
declarations?  Two spaces for everything other than "def ..." and the
braces.

>  let Predicates = [isR600toCayman] in { 
>  
> diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp
> index 48443fb..efeb96f 100644
> --- a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp
> +++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp
> @@ -12,5 +12,20 @@
>  using namespace llvm;
>  
>  R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
> -  : MachineFunctionInfo()
> +  : MachineFunctionInfo(), HasLinearInterpolation(false), HasPerspectiveInterpolation(false)

80 character wrap.

>    { }
> +
> +unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const
> +{
> +  assert(HasPerspectiveInterpolation);
> +  return 0;
> +}
> +
> +unsigned R600MachineFunctionInfo::GetIJLinearIndex() const
> +{
> +  assert(HasLinearInterpolation);
> +  if (HasPerspectiveInterpolation)
> +    return 1;
> +  else
> +    return 0;
> +}
> diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h
> index 948e192..68211b2 100644
> --- a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h
> +++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h
> @@ -25,6 +25,11 @@ class R600MachineFunctionInfo : public MachineFunctionInfo {
>  public:
>    R600MachineFunctionInfo(const MachineFunction &MF);
>    std::vector<unsigned> ReservedRegs;
> +  bool HasLinearInterpolation;
> +  bool HasPerspectiveInterpolation;
> +  
> +  unsigned GetIJLinearIndex() const;
> +  unsigned GetIJPerspectiveIndex() const;
>  
>  };
>  
> -- 
> 1.7.11.4
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev