[Mesa-dev] [PATCH 1/2] radeon/llvm: use specialised R600.store.pixel.* for fragment shader
Tom Stellard
tom at stellard.net
Tue Oct 9 06:50:45 PDT 2012
On Sun, Oct 07, 2012 at 09:11:15PM +0200, Vincent Lejeune wrote:
> ---
> src/gallium/drivers/radeon/AMDGPUISelLowering.cpp | 1 +
> src/gallium/drivers/radeon/AMDGPUISelLowering.h | 1 +
> .../radeon/MCTargetDesc/R600MCCodeEmitter.cpp | 11 +-
> src/gallium/drivers/radeon/R600ISelLowering.cpp | 68 ++++++++++++
> src/gallium/drivers/radeon/R600Instructions.td | 114 +++++++++++++++++++++
> .../drivers/radeon/R600IntrinsicsNoOpenCL.td | 6 ++
> src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td | 6 ++
> .../drivers/radeon/R600MachineFunctionInfo.cpp | 4 +-
> .../drivers/radeon/R600MachineFunctionInfo.h | 1 +
> 9 files changed, 210 insertions(+), 2 deletions(-)
>
A few coding style issues I think I missed in the first review, that
I've noted below. With those changes:
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
> diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
> index 04dadc3..5cb4d87 100644
> --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
> +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
> @@ -348,5 +348,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
> NODE_NAME_CASE(URECIP)
> NODE_NAME_CASE(INTERP)
> NODE_NAME_CASE(INTERP_P0)
> + NODE_NAME_CASE(EXPORT)
> }
> }
> diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.h b/src/gallium/drivers/radeon/AMDGPUISelLowering.h
> index 2d8ed82..58d2287 100644
> --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.h
> +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.h
> @@ -121,6 +121,7 @@ enum
> URECIP,
> INTERP,
> INTERP_P0,
> + EXPORT,
> LAST_AMDGPU_ISD_NUMBER
> };
>
> diff --git a/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp b/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp
> index a11f482..b42ee3c 100644
> --- a/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp
> +++ b/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp
> @@ -108,7 +108,8 @@ enum InstrTypes {
> INSTR_TEX,
> INSTR_FC,
> INSTR_NATIVE,
> - INSTR_VTX
> + INSTR_VTX,
> + INSTR_EXPORT
> };
>
> enum FCInstr {
> @@ -183,6 +184,14 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
> Emit(InstWord2, OS);
> break;
> }
> + case AMDGPU::EG_Export:
> + case AMDGPU::R600_Export:
> + {
> + uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
> + EmitByte(INSTR_EXPORT, OS);
> + Emit(Inst, OS);
> + break;
> + }
>
> default:
> EmitALUInstr(MI, Fixups, OS);
> diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp
> index 5dd2f53..128d14a 100644
> --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp
> +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp
> @@ -264,6 +264,26 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
>
> return BB;
> }
> + case AMDGPU::EG_Export:
> + case AMDGPU::R600_Export:
> + {
> + bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
> + if (!EOP)
> + return BB;
> + unsigned cf_inst = (MI->getOpcode() == AMDGPU::EG_Export)? 84 : 40;
cf_inst Needs to be capitalized and no underscore cf_inst -> CfInst
> + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
> + .addOperand(MI->getOperand(0))
> + .addOperand(MI->getOperand(1))
> + .addOperand(MI->getOperand(2))
> + .addOperand(MI->getOperand(3))
> + .addOperand(MI->getOperand(4))
> + .addOperand(MI->getOperand(5))
> + .addOperand(MI->getOperand(6))
> + .addImm(cf_inst)
> + .addImm(1);
> + break;
> + }
> +
> }
>
> MI->eraseFromParent();
> @@ -300,6 +320,54 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
> }
> return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
> }
> + case AMDGPUIntrinsic::R600_store_pixel_color: {
> + MachineFunction &MF = DAG.getMachineFunction();
> + R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>();
> + int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
> + unsigned Slot = RegIndex / 4;
> +
> + SDNode **outputsMap = MFI->outputs;
> +
> + if (!outputsMap[Slot]) {
> + SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
> + Op.getDebugLoc(), MVT::v4f32,
> + DAG.getUNDEF(MVT::v4f32),
> + Op.getOperand(2),
> + DAG.getConstant(RegIndex % 4, MVT::i32));
> +
> + const SDValue Ops[8] = {Chain, Vector, DAG.getConstant(0, MVT::i32),
> + DAG.getConstant(Slot, MVT::i32), DAG.getConstant(0, MVT::i32),
> + DAG.getConstant(1, MVT::i32), DAG.getConstant(2, MVT::i32),
> + DAG.getConstant(3, MVT::i32)};
> +
> + SDValue Res = DAG.getNode(
> + AMDGPUISD::EXPORT,
> + Op.getDebugLoc(),
> + MVT::Other,
> + Ops, 8);
> + outputsMap[Slot] = Res.getNode();
> + return Res;
> + }
> +
> + SDNode *ExportInstruction = (SDNode *) outputsMap[Slot] ;
> + SDValue PreviousVector = ExportInstruction->getOperand(1);
> + SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
> + Op.getDebugLoc(), MVT::v4f32,
> + PreviousVector,
> + Op.getOperand(2),
> + DAG.getConstant(RegIndex % 4, MVT::i32));
> +
> + const SDValue Ops[8] = {ExportInstruction->getOperand(0), Vector, DAG.getConstant(0, MVT::i32),
> + DAG.getConstant(Slot, MVT::i32), DAG.getConstant(0, MVT::i32),
> + DAG.getConstant(1, MVT::i32), DAG.getConstant(2, MVT::i32),
> + DAG.getConstant(3, MVT::i32)};
> +
> + DAG.UpdateNodeOperands(ExportInstruction,
> + Ops, 8);
> +
> + return Chain;
> + }
> +
> // default for switch(IntrinsicID)
> default: break;
> }
> diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
> index 1689a2f..a5b4635 100644
> --- a/src/gallium/drivers/radeon/R600Instructions.td
> +++ b/src/gallium/drivers/radeon/R600Instructions.td
> @@ -74,7 +74,33 @@ class R600_ALU {
>
> def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
> (ops PRED_SEL_OFF)>;
> +
>
Extra whitespace here.
> +def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
> +
> +def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType,
> + [SDNPHasChain, SDNPSideEffect]>;
> +
> +multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
> + def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
> + (ExportInst
> + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
> + 0, 61, 0, 7, 7, 7, cf_inst, 0)
> + >;
> +
> + def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
> + (ExportInst
> + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
> + 0, 61, 7, 0, 7, 7, cf_inst, 0)
> + >;
> +
> + def : Pat<(EXPORT (v4f32 R600_Reg128:$src),
> + (i32 imm:$type), (i32 imm:$arraybase),
> + (i32 imm:$sw_x), (i32 imm:$sw_y), (i32 imm:$sw_z), (i32 imm:$sw_w)),
> + (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
> + imm:$sw_x, imm:$sw_y, imm:$sw_z, imm:$sw_w, cf_inst, 0)
> + >;
> +}
>
> class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
> InstrItinClass itin = AnyALU> :
> @@ -866,6 +892,50 @@ class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ie
>
> let Predicates = [isR600] in {
>
> + let isTerminator = 1, usesCustomInserter = 1 in {
> +
> + def R600_Export :
> + InstR600ISA<
> + (outs),
> + (ins R600_Reg128:$src, i32imm:$type, i32imm:$arraybase,
> + i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst,
> + i32imm:$eop),
> + !strconcat("EXPORT", " $src"),
> + []>{
> + bits<13> arraybase;
> + bits<2> type;
> + bits<7> src;
> +
> + bits<3> sw_x;
> + bits<3> sw_y;
> + bits<3> sw_z;
> + bits<3> sw_w;
> +
> + bits<1> eop;
> + bits<8> inst;
> +
> + let Inst{12-0} = arraybase;
> + let Inst{14-13} = type;
> + let Inst{21-15} = src;
> + let Inst{22} = 0; // RW_REL
> + let Inst{29-23} = 0; // INDEX_GPR
> + let Inst{31-30} = 3; // ELEM_SIZE
> + let Inst{34-32} = sw_x;
> + let Inst{37-35} = sw_y;
> + let Inst{40-38} = sw_z;
> + let Inst{43-41} = sw_w;
> + let Inst{52-49} = 1; // BURST_COUNT
> + let Inst{53} = 1; // VALID_PIXEL_MODE
> + let Inst{54} = eop;
> + let Inst{62-55} = inst;
> + let Inst{63} = 1; // BARRIER
> + }
> +
> + } // End isTerminator = 1, usesCustomInserter = 1
> +
> + defm : ExportPattern<R600_Export, 39>;
> +
> +
> def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
> def MULADD_r600 : MULADD_Common<0x10>;
> def CNDE_r600 : CNDE_Common<0x18>;
> @@ -945,6 +1015,50 @@ def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
>
> let Predicates = [isEGorCayman] in {
>
> + let isTerminator = 1, usesCustomInserter = 1 in {
> +
> + def EG_Export :
> + InstR600ISA<
> + (outs),
> + (ins R600_Reg128:$src, i32imm:$type, i32imm:$arraybase,
> + i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst,
> + i32imm:$eop),
> + !strconcat("EXPORT", " $src"),
> + []>{
> + bits<13> arraybase;
> + bits<2> type;
> + bits<7> src;
> +
> + bits<3> sw_x;
> + bits<3> sw_y;
> + bits<3> sw_z;
> + bits<3> sw_w;
> +
> + bits<1> eop;
> + bits<8> inst;
> +
> + let Inst{12-0} = arraybase;
> + let Inst{14-13} = type;
> + let Inst{21-15} = src;
> + let Inst{22} = 0; // RW_REL
> + let Inst{29-23} = 0; // INDEX_GPR
> + let Inst{31-30} = 3; // ELEM_SIZE
> + let Inst{34-32} = sw_x;
> + let Inst{37-35} = sw_y;
> + let Inst{40-38} = sw_z;
> + let Inst{43-41} = sw_w;
> + let Inst{51-48} = 1; // BURST_COUNT
> + let Inst{52} = 1; // VALID_PIXEL_MODE
> + let Inst{53} = eop;
> + let Inst{61-54} = inst;
> + let Inst{62} = 0; // MARK
> + let Inst{63} = 1; // BARRIER
> + }
> +
> + } // End isTerminator = 1, usesCustomInserter = 1
> +
> + defm : ExportPattern<EG_Export, 83>;
> +
> // BFE_UINT - bit_extract, an optimization for mask and shift
> // Src0 = Input
> // Src1 = Offset
> diff --git a/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td
> index 3b62f0a..e6241d2 100644
> --- a/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td
> +++ b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td
> @@ -23,6 +23,12 @@ let TargetPrefix = "R600", isTarget = 1 in {
> Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
> def int_R600_load_input_face :
> Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>;
> + def int_R600_store_pixel_color :
> + Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
> + def int_R600_store_pixel_depth :
> + Intrinsic<[], [llvm_float_ty], []>;
> + def int_R600_store_pixel_stencil :
> + Intrinsic<[], [llvm_float_ty], []>;
> }
>
> let TargetPrefix = "r600", isTarget = 1 in {
> diff --git a/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td
> index 00877ca..664d76b 100644
> --- a/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td
> +++ b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td
> @@ -23,4 +23,10 @@ let TargetPrefix = "R600", isTarget = 1 in {
> Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
> def int_R600_load_input_face :
> Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>;
> + def int_R600_store_pixel_color :
> + Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
> + def int_R600_store_pixel_depth :
> + Intrinsic<[], [llvm_float_ty], []>;
> + def int_R600_store_pixel_stencil :
> + Intrinsic<[], [llvm_float_ty], []>;
> }
> diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp
> index a31848e..1314468 100644
> --- a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp
> +++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp
> @@ -15,7 +15,9 @@ R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
> : MachineFunctionInfo(),
> HasLinearInterpolation(false),
> HasPerspectiveInterpolation(false)
> - { }
> + {
> + memset(outputs, 0, sizeof(outputs));
> + }
>
> unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const
> {
> diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h
> index 68211b2..1fa5b23 100644
> --- a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h
> +++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h
> @@ -25,6 +25,7 @@ class R600MachineFunctionInfo : public MachineFunctionInfo {
> public:
> R600MachineFunctionInfo(const MachineFunction &MF);
> std::vector<unsigned> ReservedRegs;
> + class SDNode *outputs[16];
You don't need to use the class keyword here. Also, this variable name
should be capitalized: Outputs
> bool HasLinearInterpolation;
> bool HasPerspectiveInterpolation;
>
> --
> 1.7.11.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list