[Mesa-dev] [PATCH] R600/SI: expose TBUFFER_STORE_FORMAT_* for OpenGL transform feedback
Tom Stellard
tom at stellard.net
Thu Sep 5 08:44:57 PDT 2013
On Mon, Sep 02, 2013 at 09:07:18PM +0200, Marek Olšák wrote:
> For _XYZ, the type of VDATA is v4i32, because v3i32 doesn't exist.
>
> The ADDR64 bit is not exposed. A simpler intrinsic that doesn't take
> a resource descriptor might be nicer.
>
> The maximum number of input SGPRs is bumped to 17.
>
> Signed-off-by: Marek Olšák <marek.olsak at amd.com>
> ---
> lib/Target/R600/AMDGPUCallingConv.td | 3 ++-
> lib/Target/R600/AMDGPUISelLowering.cpp | 1 +
> lib/Target/R600/AMDGPUISelLowering.h | 1 +
> lib/Target/R600/SIISelLowering.cpp | 39 ++++++++++++++++++++++++++++++++++
> lib/Target/R600/SIInstrInfo.td | 27 +++++++++++++++++++++++
> lib/Target/R600/SIInstructions.td | 29 +++++++++++++++++++++----
> lib/Target/R600/SIIntrinsics.td | 18 ++++++++++++++++
> 7 files changed, 113 insertions(+), 5 deletions(-)
>
> diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td
> index 84d3118..d26be32 100644
> --- a/lib/Target/R600/AMDGPUCallingConv.td
> +++ b/lib/Target/R600/AMDGPUCallingConv.td
> @@ -19,7 +19,8 @@ def CC_SI : CallingConv<[
>
> CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[
> SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
> - SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15
> + SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
> + SGPR16
Why is this necessary? Are we using all 16 user sgprs now?
> ]>>>,
>
> CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow<
> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
> index 1237323..30d9503 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -718,5 +718,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
> NODE_NAME_CASE(SAMPLED)
> NODE_NAME_CASE(SAMPLEL)
> NODE_NAME_CASE(STORE_MSKOR)
> + NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
> }
> }
> diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
> index 75ac4c2..8a68356 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.h
> +++ b/lib/Target/R600/AMDGPUISelLowering.h
> @@ -160,6 +160,7 @@ enum {
> FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
> STORE_MSKOR,
> LOAD_CONSTANT,
> + TBUFFER_STORE_FORMAT,
> LAST_AMDGPU_ISD_NUMBER
> };
>
> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> index f196059..6fa0c85 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -86,6 +86,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
> setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom);
> setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
>
> + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
> +
> setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
>
> setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
> @@ -462,6 +464,43 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
> Op.getOperand(3));
> }
> }
> +
> + case ISD::INTRINSIC_VOID:
> + SDValue Chain = Op.getOperand(0);
> + unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
> +
> + switch (IntrinsicID) {
> + case AMDGPUIntrinsic::SI_tbuffer_store: {
> + SDLoc DL(Op);
> + SDValue Ops [] = {
> + Chain,
> + ResourceDescriptorToi128(Op.getOperand(2), DAG),
> + Op.getOperand(3),
> + Op.getOperand(4),
> + Op.getOperand(5),
> + Op.getOperand(6),
> + Op.getOperand(7),
> + Op.getOperand(8),
> + Op.getOperand(9),
> + Op.getOperand(10),
> + Op.getOperand(11),
> + Op.getOperand(12),
> + Op.getOperand(13),
> + Op.getOperand(14)
> + };
> + EVT VT = Op.getOperand(3).getValueType();
> +
> + MachineMemOperand *MMO = MF.getMachineMemOperand(
> + MachinePointerInfo(),
> + MachineMemOperand::MOStore,
> + VT.getSizeInBits() / 8, 4);
> + return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
> + Op->getVTList(), Ops,
> + sizeof(Ops)/sizeof(Ops[0]), VT, MMO);
> + }
> + default:
> + break;
> + }
> }
> return SDValue();
> }
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index ecc4718..c902feb 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -21,6 +21,25 @@ def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
> [SDNPMayLoad, SDNPMemOperand]
> >;
>
> +def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
> + SDTypeProfile<0, 13,
> + [SDTCisVT<0, i128>, // rsrc(SGPR)
> + SDTCisVT<1, iAny>, // vdata(VGPR)
> + SDTCisVT<2, i32>, // num_channels(imm)
> + SDTCisVT<3, i32>, // vaddr(VGPR)
> + SDTCisVT<4, i32>, // soffset(SGPR)
> + SDTCisVT<5, i32>, // inst_offset(imm)
> + SDTCisVT<6, i32>, // dfmt(imm)
> + SDTCisVT<7, i32>, // nfmt(imm)
> + SDTCisVT<8, i32>, // offen(imm)
> + SDTCisVT<9, i32>, // idxen(imm)
> + SDTCisVT<10, i32>, // glc(imm)
> + SDTCisVT<11, i32>, // slc(imm)
> + SDTCisVT<12, i32> // tfe(imm)
> + ]>,
> + [SDNPMayStore, SDNPMemOperand, SDNPHasChain]
> +>;
> +
> def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
> SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, i128>, SDTCisVT<2, i16>,
> SDTCisVT<3, i32>]>
> @@ -65,6 +84,14 @@ def IMM8bitDWORD : ImmLeaf <
> }]>
> >;
>
> +def as_i1imm : SDNodeXForm<imm, [{
> + return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i1);
> +}]>;
> +
> +def as_i8imm : SDNodeXForm<imm, [{
> + return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i8);
> +}]>;
> +
> def as_i16imm : SDNodeXForm<imm, [{
> return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i16);
> }]>;
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 136f69c..cd59c80 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -475,10 +475,10 @@ def BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
> //def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>;
> //def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>;
> def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>;
> -//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>;
> -//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>;
> -//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>;
> -//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>;
> +def TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, "TBUFFER_STORE_FORMAT_X", VReg_32>;
> +def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "TBUFFER_STORE_FORMAT_XY", VReg_64>;
> +def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", VReg_128>;
> +def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", VReg_128>;
>
> let mayLoad = 1 in {
>
> @@ -1873,6 +1873,27 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, global_store>;
> defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>;
> defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>;
>
> +//===----------------------------------------------------------------------===//
> +// MTBUF Patterns
> +//===----------------------------------------------------------------------===//
> +
> +// TBUFFER_STORE_FORMAT_*, addr64=0
> +class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF opcode> : Pat<
> + (SItbuffer_store i128:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
> + i32:$soffset, imm:$inst_offset, imm:$dfmt,
> + imm:$nfmt, imm:$offen, imm:$idxen,
> + imm:$glc, imm:$slc, imm:$tfe),
> + (opcode
> + $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen),
> + (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc,
> + (as_i1imm $slc), (as_i1imm $tfe), $soffset)
> +>;
> +
> +def : MTBUF_StoreResource <i32, 1, TBUFFER_STORE_FORMAT_X>;
> +def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>;
> +def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;
> +def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
> +
> /********** ====================== **********/
> /********** Indirect adressing **********/
> /********** ====================== **********/
> diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
> index d6e26ad..7fcc964 100644
> --- a/lib/Target/R600/SIIntrinsics.td
> +++ b/lib/Target/R600/SIIntrinsics.td
> @@ -20,6 +20,24 @@ let TargetPrefix = "SI", isTarget = 1 in {
> def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
> def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
>
> + // Fully-flexible TBUFFER_STORE_FORMAT_* except for the ADDR64 bit, which is not exposed
> + def int_SI_tbuffer_store : Intrinsic <
> + [],
> + [llvm_anyint_ty, // rsrc(SGPR)
> + llvm_anyint_ty, // vdata(VGPR), overloaded for types i32, v2i32, v4i32
> + llvm_i32_ty, // num_channels(imm), selects opcode suffix: 1=X, 2=XY, 3=XYZ, 4=XYZW
> + llvm_i32_ty, // vaddr(VGPR)
> + llvm_i32_ty, // soffset(SGPR)
> + llvm_i32_ty, // inst_offset(imm)
> + llvm_i32_ty, // dfmt(imm)
> + llvm_i32_ty, // nfmt(imm)
> + llvm_i32_ty, // offen(imm)
> + llvm_i32_ty, // idxen(imm)
> + llvm_i32_ty, // glc(imm)
> + llvm_i32_ty, // slc(imm)
> + llvm_i32_ty], // tfe(imm)
> + []>;
> +
> class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
>
> def int_SI_sample : Sample;
> --
> 1.8.1.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list