[Mesa-dev] [PATCH] radeon/llvm: support for interpolation intrinsics
Tom Stellard
tom at stellard.net
Fri Sep 21 08:44:24 PDT 2012
On Tue, Sep 18, 2012 at 10:57:22PM +0200, Vincent Lejeune wrote:
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
This patch is OK to commit if there are no regressions with the LLVM
compiler.
> ---
> src/gallium/drivers/radeon/AMDGPUISelLowering.cpp | 2 +
> src/gallium/drivers/radeon/AMDGPUISelLowering.h | 2 +
> .../drivers/radeon/R600ExpandSpecialInstrs.cpp | 129 +++++++++++++++++++++
> src/gallium/drivers/radeon/R600ISelLowering.cpp | 88 +++++++++++++-
> src/gallium/drivers/radeon/R600ISelLowering.h | 1 +
> src/gallium/drivers/radeon/R600Instructions.td | 54 +++++++++
> .../drivers/radeon/R600IntrinsicsNoOpenCL.td | 10 ++
> src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td | 10 ++
> .../drivers/radeon/R600MachineFunctionInfo.cpp | 19 ++-
> .../drivers/radeon/R600MachineFunctionInfo.h | 5 +
> 10 files changed, 318 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
> index 0a70164..72cd099 100644
> --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
> +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
> @@ -349,5 +349,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
> NODE_NAME_CASE(SMIN)
> NODE_NAME_CASE(UMIN)
> NODE_NAME_CASE(URECIP)
> + NODE_NAME_CASE(INTERP)
> + NODE_NAME_CASE(INTERP_P0)
> }
> }
> diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.h b/src/gallium/drivers/radeon/AMDGPUISelLowering.h
> index 4c100da..0d79786 100644
> --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.h
> +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.h
> @@ -121,6 +121,8 @@ enum
> SMIN,
> UMIN,
> URECIP,
> + INTERP,
> + INTERP_P0,
> LAST_AMDGPU_ISD_NUMBER
> };
>
> diff --git a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp
> index 69ab0ff..d6184e5 100644
> --- a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp
> +++ b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp
> @@ -15,6 +15,7 @@
> #include "R600Defines.h"
> #include "R600InstrInfo.h"
> #include "R600RegisterInfo.h"
> +#include "R600MachineFunctionInfo.h"
> #include "llvm/CodeGen/MachineFunctionPass.h"
> #include "llvm/CodeGen/MachineInstrBuilder.h"
> #include "llvm/CodeGen/MachineRegisterInfo.h"
> @@ -29,6 +30,9 @@ private:
> static char ID;
> const R600InstrInfo *TII;
>
> + bool ExpandInputPerspective(MachineInstr& MI);
> + bool ExpandInputConstant(MachineInstr& MI);
> +
> public:
> R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
> TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
> @@ -48,6 +52,126 @@ FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
> return new R600ExpandSpecialInstrsPass(TM);
> }
>
> +bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI)
> +{
> + const R600RegisterInfo &TRI = TII->getRegisterInfo();
> + if (MI.getOpcode() != AMDGPU::input_perspective)
> + return false;
> +
> + MachineBasicBlock::iterator I = &MI;
> + unsigned DstReg = MI.getOperand(0).getReg();
> + R600MachineFunctionInfo *MFI = MI.getParent()->getParent()
> + ->getInfo<R600MachineFunctionInfo>();
> + unsigned IJIndexBase;
> +
> + // In Evergreen ISA doc section 8.3.2 :
> + // We need to interpolate XY and ZW in two different instruction groups.
> + // An INTERP_* must occupy all 4 slots of an instruction group.
> + // Output of INTERP_XY is written in X,Y slots
> + // Output of INTERP_ZW is written in Z,W slots
> + //
> + // Thus interpolation requires the following sequences :
> + //
> + // AnyGPR.x = INTERP_ZW; (Write Masked Out)
> + // AnyGPR.y = INTERP_ZW; (Write Masked Out)
> + // DstGPR.z = INTERP_ZW;
> + // DstGPR.w = INTERP_ZW; (End of first IG)
> + // DstGPR.x = INTERP_XY;
> + // DstGPR.y = INTERP_XY;
> + // AnyGPR.z = INTERP_XY; (Write Masked Out)
> + // AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG)
> + //
> + switch (MI.getOperand(1).getImm()) {
> + case 0:
> + IJIndexBase = MFI->GetIJPerspectiveIndex();
> + break;
> + case 1:
> + IJIndexBase = MFI->GetIJLinearIndex();
> + break;
> + default:
> + assert(0 && "Unknow ij index");
> + }
> +
> + for (unsigned i = 0; i < 8; i++) {
> + unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister(
> + 2 * IJIndexBase + ((i + 1) % 2));
> + unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
> + 4 * MI.getOperand(2).getImm());
> +
> + unsigned Sel;
> + switch (i % 4) {
> + case 0:Sel = AMDGPU::sel_x;break;
> + case 1:Sel = AMDGPU::sel_y;break;
> + case 2:Sel = AMDGPU::sel_z;break;
> + case 3:Sel = AMDGPU::sel_w;break;
> + default:break;
> + }
> +
> + unsigned Res = TRI.getSubReg(DstReg, Sel);
> +
> + const MCInstrDesc &Opcode = (i < 4)?
> + TII->get(AMDGPU::INTERP_ZW):
> + TII->get(AMDGPU::INTERP_XY);
> +
> + MachineInstr *NewMI = BuildMI(*(MI.getParent()),
> + I, MI.getParent()->findDebugLoc(I),
> + Opcode, Res)
> + .addReg(IJIndex)
> + .addReg(ReadReg)
> + .addImm(0);
> +
> + if (!(i> 1 && i < 6)) {
> + TII->addFlag(NewMI, 0, MO_FLAG_MASK);
> + }
> +
> + if (i % 4 != 3)
> + TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
> + }
> +
> + MI.eraseFromParent();
> +
> + return true;
> +}
> +
> +bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI)
> +{
> + const R600RegisterInfo &TRI = TII->getRegisterInfo();
> + if (MI.getOpcode() != AMDGPU::input_constant)
> + return false;
> +
> + MachineBasicBlock::iterator I = &MI;
> + unsigned DstReg = MI.getOperand(0).getReg();
> +
> + for (unsigned i = 0; i < 4; i++) {
> + unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
> + 4 * MI.getOperand(1).getImm() + i);
> +
> + unsigned Sel;
> + switch (i % 4) {
> + case 0:Sel = AMDGPU::sel_x;break;
> + case 1:Sel = AMDGPU::sel_y;break;
> + case 2:Sel = AMDGPU::sel_z;break;
> + case 3:Sel = AMDGPU::sel_w;break;
> + default:break;
> + }
> +
> + unsigned Res = TRI.getSubReg(DstReg, Sel);
> +
> + MachineInstr *NewMI = BuildMI(*(MI.getParent()),
> + I, MI.getParent()->findDebugLoc(I),
> + TII->get(AMDGPU::INTERP_LOAD_P0), Res)
> + .addReg(ReadReg)
> + .addImm(0);
> +
> + if (i % 4 != 3)
> + TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
> + }
> +
> + MI.eraseFromParent();
> +
> + return true;
> +}
> +
> bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
>
> const R600RegisterInfo &TRI = TII->getRegisterInfo();
> @@ -59,6 +183,11 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
> while (I != MBB.end()) {
> MachineInstr &MI = *I;
> I = llvm::next(I);
> +
> + if (ExpandInputPerspective(MI))
> + continue;
> + if (ExpandInputConstant(MI))
> + continue;
>
> bool IsReduction = TII->isReductionOp(MI.getOpcode());
> bool IsVector = TII->isVector(MI);
> diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp
> index 5642ee8..379bd48 100644
> --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp
> +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp
> @@ -40,6 +40,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
>
> setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
> setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
> + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
>
> setOperationAction(ISD::ROTL, MVT::i32, Custom);
>
> @@ -231,6 +232,29 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
> .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
> break;
> }
> + case AMDGPU::input_perspective:
> + {
> + R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
> +
> + // XXX Be more fine about register reservation
> + for (unsigned i = 0; i < 4; i ++) {
> + unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i);
> + MFI->ReservedRegs.push_back(ReservedReg);
> + }
> +
> + switch (MI->getOperand(1).getImm()) {
> + case 0:// Perspective
> + MFI->HasPerspectiveInterpolation = true;
> + break;
> + case 1:// Linear
> + MFI->HasLinearInterpolation = true;
> + break;
> + default:
> + assert(0 && "Unknow ij index");
> + }
> +
> + return BB;
> + }
> }
>
> MI->eraseFromParent();
> @@ -285,7 +309,48 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
> unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
> return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
> }
> -
> + case AMDGPUIntrinsic::R600_load_input_perspective: {
> + unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
> + SDValue FullVector = DAG.getNode(
> + AMDGPUISD::INTERP,
> + DL, MVT::v4f32,
> + DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
> + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
> + DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
> + }
> + case AMDGPUIntrinsic::R600_load_input_linear: {
> + unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
> + SDValue FullVector = DAG.getNode(
> + AMDGPUISD::INTERP,
> + DL, MVT::v4f32,
> + DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
> + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
> + DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
> + }
> + case AMDGPUIntrinsic::R600_load_input_constant: {
> + unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
> + SDValue FullVector = DAG.getNode(
> + AMDGPUISD::INTERP_P0,
> + DL, MVT::v4f32,
> + DAG.getConstant(slot / 4 , MVT::i32));
> + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
> + DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
> + }
> + case AMDGPUIntrinsic::R600_load_input_position: {
> + unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
> + unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
> + SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
> + RegIndex, MVT::f32);
> + if ((slot % 4) == 3) {
> + return DAG.getNode(ISD::FDIV,
> + DL, VT,
> + DAG.getConstantFP(1.0f, MVT::f32),
> + Reg);
> + } else {
> + return Reg;
> + }
> + }
> +
> case r600_read_ngroups_x:
> return LowerImplicitParameter(DAG, VT, DL, 0);
> case r600_read_ngroups_y:
> @@ -338,9 +403,30 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
> switch (N->getOpcode()) {
> default: return;
> case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
> + case ISD::INTRINSIC_WO_CHAIN:
> + {
> + unsigned IntrinsicID =
> + cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
> + if (IntrinsicID == AMDGPUIntrinsic::R600_load_input_face) {
> + Results.push_back(LowerInputFace(N, DAG));
> + } else {
> + return;
> + }
> + }
> }
> }
>
> +SDValue R600TargetLowering::LowerInputFace(SDNode* Op, SelectionDAG &DAG) const
> +{
> + unsigned slot = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
> + unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
> + SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
> + RegIndex, MVT::f32);
> + return DAG.getNode(ISD::SETCC, Op->getDebugLoc(), MVT::i1,
> + Reg, DAG.getConstantFP(0.0f, MVT::f32),
> + DAG.getCondCode(ISD::SETUGT));
> +}
> +
> SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const
> {
> return DAG.getNode(
> diff --git a/src/gallium/drivers/radeon/R600ISelLowering.h b/src/gallium/drivers/radeon/R600ISelLowering.h
> index 49ea272..7c81f5f 100644
> --- a/src/gallium/drivers/radeon/R600ISelLowering.h
> +++ b/src/gallium/drivers/radeon/R600ISelLowering.h
> @@ -50,6 +50,7 @@ private:
>
> SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
> SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
> + SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const;
> SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
> };
>
> diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
> index 75c6825..92a3e38 100644
> --- a/src/gallium/drivers/radeon/R600Instructions.td
> +++ b/src/gallium/drivers/radeon/R600Instructions.td
> @@ -233,6 +233,60 @@ def isEGorCayman : Predicate<"Subtarget.device()"
> def isR600toCayman : Predicate<
> "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
>
> +//===----------------------------------------------------------------------===//
> +// Interpolation Instructions
> +//===----------------------------------------------------------------------===//
> +
> +def INTERP: SDNode<"AMDGPUISD::INTERP",
> + SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]>
> + >;
> +
> +def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0",
> + SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]>
> + >;
> +
> +let usesCustomInserter = 1 in {
> +def input_perspective : AMDGPUShaderInst <
> + (outs R600_Reg128:$dst),
> + (ins i32imm:$src0, i32imm:$src1),
> + "input_perspective $src0 $src1 : dst",
> + [(set R600_Reg128:$dst, (INTERP (i32 imm:$src0), (i32 imm:$src1)))]>;
> +} // End usesCustomInserter = 1
> +
> +def input_constant : AMDGPUShaderInst <
> + (outs R600_Reg128:$dst),
> + (ins i32imm:$src),
> + "input_perspective $src : dst",
> + [(set R600_Reg128:$dst, (INTERP_P0 (i32 imm:$src)))]>;
> +
> +
> +
> +def INTERP_XY : InstR600 <0xD6,
> + (outs R600_Reg32:$dst),
> + (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags),
> + "INTERP_XY dst",
> + [], AnyALU>
> +{
> + let FlagOperandIdx = 3;
> +}
> +
> +def INTERP_ZW : InstR600 <0xD7,
> + (outs R600_Reg32:$dst),
> + (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags),
> + "INTERP_ZW dst",
> + [], AnyALU>
> +{
> + let FlagOperandIdx = 3;
> +}
> +
> +def INTERP_LOAD_P0 : InstR600 <0xE0,
> + (outs R600_Reg32:$dst),
> + (ins R600_Reg32:$src, i32imm:$flags),
> + "INTERP_LOAD_P0 dst",
> + [], AnyALU>
> +{
> + let FlagOperandIdx = 2;
> +}
>
> let Predicates = [isR600toCayman] in {
>
> diff --git a/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td
> index 98af358..3b62f0a 100644
> --- a/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td
> +++ b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td
> @@ -13,6 +13,16 @@
>
> let TargetPrefix = "R600", isTarget = 1 in {
> def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
> + def int_R600_load_input_perspective :
> + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
> + def int_R600_load_input_constant :
> + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
> + def int_R600_load_input_linear :
> + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
> + def int_R600_load_input_position :
> + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
> + def int_R600_load_input_face :
> + Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>;
> }
>
> let TargetPrefix = "r600", isTarget = 1 in {
> diff --git a/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td
> index 8efa29b..00877ca 100644
> --- a/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td
> +++ b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td
> @@ -13,4 +13,14 @@
>
> let TargetPrefix = "R600", isTarget = 1 in {
> def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
> + def int_R600_load_input_perspective :
> + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
> + def int_R600_load_input_constant :
> + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
> + def int_R600_load_input_linear :
> + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
> + def int_R600_load_input_position :
> + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
> + def int_R600_load_input_face :
> + Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>;
> }
> diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp
> index 48443fb..a31848e 100644
> --- a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp
> +++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp
> @@ -12,5 +12,22 @@
> using namespace llvm;
>
> R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
> - : MachineFunctionInfo()
> + : MachineFunctionInfo(),
> + HasLinearInterpolation(false),
> + HasPerspectiveInterpolation(false)
> { }
> +
> +unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const
> +{
> + assert(HasPerspectiveInterpolation);
> + return 0;
> +}
> +
> +unsigned R600MachineFunctionInfo::GetIJLinearIndex() const
> +{
> + assert(HasLinearInterpolation);
> + if (HasPerspectiveInterpolation)
> + return 1;
> + else
> + return 0;
> +}
> diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h
> index 948e192..68211b2 100644
> --- a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h
> +++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h
> @@ -25,6 +25,11 @@ class R600MachineFunctionInfo : public MachineFunctionInfo {
> public:
> R600MachineFunctionInfo(const MachineFunction &MF);
> std::vector<unsigned> ReservedRegs;
> + bool HasLinearInterpolation;
> + bool HasPerspectiveInterpolation;
> +
> + unsigned GetIJLinearIndex() const;
> + unsigned GetIJPerspectiveIndex() const;
>
> };
>
> --
> 1.7.11.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list