[Mesa-dev] [PATCH 1/2] R600/SI: add Gather4 intrinsics (v2)

Tom Stellard tom at stellard.net
Mon Jun 16 08:45:42 PDT 2014


On Thu, Jun 12, 2014 at 02:11:10AM +0200, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> This adds a new type of intrinsic and SDNode: SampleRaw.
> All fields of the MIMG opcodes are exposed and can be set by Mesa,
> even DMASK. All GATHER4 variants are added and there are a lot of them.
> 
> v2: document DMASK behavior
> ---
>  lib/Target/R600/AMDGPUISelLowering.cpp | 24 +++++++++
>  lib/Target/R600/AMDGPUISelLowering.h   | 31 +++++++++++
>  lib/Target/R600/SIISelLowering.cpp     | 72 +++++++++++++++++++++++++
>  lib/Target/R600/SIISelLowering.h       |  2 +
>  lib/Target/R600/SIInstrInfo.td         | 91 ++++++++++++++++++++++++++++++++
>  lib/Target/R600/SIInstructions.td      | 96 +++++++++++++++++++++++++---------
>  lib/Target/R600/SIIntrinsics.td        | 48 +++++++++++++++++
>  7 files changed, 340 insertions(+), 24 deletions(-)
> 
> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
> index 849f169..359161c 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -1542,6 +1542,30 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
>    NODE_NAME_CASE(SAMPLEB)
>    NODE_NAME_CASE(SAMPLED)
>    NODE_NAME_CASE(SAMPLEL)
> +  NODE_NAME_CASE(GATHER4)
> +  NODE_NAME_CASE(GATHER4_CL)
> +  NODE_NAME_CASE(GATHER4_L)
> +  NODE_NAME_CASE(GATHER4_B)
> +  NODE_NAME_CASE(GATHER4_B_CL)
> +  NODE_NAME_CASE(GATHER4_LZ)
> +  NODE_NAME_CASE(GATHER4_C)
> +  NODE_NAME_CASE(GATHER4_C_CL)
> +  NODE_NAME_CASE(GATHER4_C_L)
> +  NODE_NAME_CASE(GATHER4_C_B)
> +  NODE_NAME_CASE(GATHER4_C_B_CL)
> +  NODE_NAME_CASE(GATHER4_C_LZ)
> +  NODE_NAME_CASE(GATHER4_O)
> +  NODE_NAME_CASE(GATHER4_CL_O)
> +  NODE_NAME_CASE(GATHER4_L_O)
> +  NODE_NAME_CASE(GATHER4_B_O)
> +  NODE_NAME_CASE(GATHER4_B_CL_O)
> +  NODE_NAME_CASE(GATHER4_LZ_O)
> +  NODE_NAME_CASE(GATHER4_C_O)
> +  NODE_NAME_CASE(GATHER4_C_CL_O)
> +  NODE_NAME_CASE(GATHER4_C_L_O)
> +  NODE_NAME_CASE(GATHER4_C_B_O)
> +  NODE_NAME_CASE(GATHER4_C_B_CL_O)
> +  NODE_NAME_CASE(GATHER4_C_LZ_O)

You don't need to add new SDNodes for all these instructions, you can just use
the intrinsic directly in the pattern.

The only reason to add SDNodes, is if there are optimizations / special lowering
we can do for these instructions.

>    NODE_NAME_CASE(STORE_MSKOR)
>    NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
>    }
> diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
> index d5d821d..a9af195 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.h
> +++ b/lib/Target/R600/AMDGPUISelLowering.h
> @@ -203,6 +203,37 @@ enum {
>    SAMPLEB,
>    SAMPLED,
>    SAMPLEL,
> +
> +  // Gather4 opcodes
> +  GATHER4,
> +  GATHER4_CL,
> +  GATHER4_L,
> +  GATHER4_B,
> +  GATHER4_B_CL,
> +  GATHER4_LZ,
> +
> +  GATHER4_C,
> +  GATHER4_C_CL,
> +  GATHER4_C_L,
> +  GATHER4_C_B,
> +  GATHER4_C_B_CL,
> +  GATHER4_C_LZ,
> +
> +  GATHER4_O,
> +  GATHER4_CL_O,
> +  GATHER4_L_O,
> +  GATHER4_B_O,
> +  GATHER4_B_CL_O,
> +  GATHER4_LZ_O,
> +
> +  GATHER4_C_O,
> +  GATHER4_C_CL_O,
> +  GATHER4_C_L_O,
> +  GATHER4_C_B_O,
> +  GATHER4_C_B_CL_O,
> +  GATHER4_C_LZ_O,
> +
> +  // Nemory opcodes
>    FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
>    STORE_MSKOR,
>    LOAD_CONSTANT,
> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> index 1a861d4..909255d 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -688,6 +688,59 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
>                           Op.getOperand(1),
>                           Op.getOperand(2),
>                           Op.getOperand(3));
> +
> +    // Gather4 intrinsics
> +    case AMDGPUIntrinsic::SI_gather4:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4, Op, DAG);
> +    case AMDGPUIntrinsic::SI_gather4_cl:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_CL, Op, DAG);
> +    case AMDGPUIntrinsic::SI_gather4_l:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_L, Op, DAG);
> +    case AMDGPUIntrinsic::SI_gather4_b:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B, Op, DAG);
> +    case AMDGPUIntrinsic::SI_gather4_b_cl:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_CL, Op, DAG);
> +    case AMDGPUIntrinsic::SI_gather4_lz:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_LZ, Op, DAG);
> +
> +    case AMDGPUIntrinsic::SI_gather4_c:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C, Op, DAG);
> +    case AMDGPUIntrinsic::SI_gather4_c_cl:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_CL, Op, DAG);
> +    case AMDGPUIntrinsic::SI_gather4_c_l:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_L, Op, DAG);
> +    case AMDGPUIntrinsic::SI_gather4_c_b:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B, Op, DAG);
> +    case AMDGPUIntrinsic::SI_gather4_c_b_cl:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B_CL, Op, DAG);
> +    case AMDGPUIntrinsic::SI_gather4_c_lz:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_LZ, Op, DAG);
> +
> +    case AMDGPUIntrinsic::SI_gather4_o:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_O, Op, DAG);
> +    case AMDGPUIntrinsic::SI_gather4_cl_o:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_CL_O, Op, DAG);
> +    case AMDGPUIntrinsic::SI_gather4_l_o:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_L_O, Op, DAG);
> +    case AMDGPUIntrinsic::SI_gather4_b_o:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_O, Op, DAG);
> +    case AMDGPUIntrinsic::SI_gather4_b_cl_o:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_CL_O, Op, DAG);
> +    case AMDGPUIntrinsic::SI_gather4_lz_o:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_LZ_O, Op, DAG);
> +
> +    case AMDGPUIntrinsic::SI_gather4_c_o:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_O, Op, DAG);
> +    case AMDGPUIntrinsic::SI_gather4_c_cl_o:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_CL_O, Op, DAG);
> +    case AMDGPUIntrinsic::SI_gather4_c_l_o:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_L_O, Op, DAG);
> +    case AMDGPUIntrinsic::SI_gather4_c_b_o:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B_O, Op, DAG);
> +    case AMDGPUIntrinsic::SI_gather4_c_b_cl_o:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B_CL_O, Op, DAG);
> +    case AMDGPUIntrinsic::SI_gather4_c_lz_o:
> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_LZ_O, Op, DAG);
>      }
>    }
>  
> @@ -876,6 +929,25 @@ SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
>                       Op.getOperand(4));
>  }
>  
> +SDValue SITargetLowering::LowerSampleRawIntrinsic(unsigned Opcode,
> +                                                  const SDValue &Op,
> +                                                  SelectionDAG &DAG) const {
> +  SDValue Ops[] = {
> +    Op.getOperand(1),
> +    Op.getOperand(2),
> +    Op.getOperand(3),
> +    Op.getOperand(4),
> +    Op.getOperand(5),
> +    Op.getOperand(6),
> +    Op.getOperand(7),
> +    Op.getOperand(8),
> +    Op.getOperand(9),
> +    Op.getOperand(10),
> +    Op.getOperand(11)
> +  };
> +  return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Ops);
> +}
> +
>  SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
>    if (Op.getValueType() != MVT::i64)
>      return SDValue();
> diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
> index c6eaa81..b48da3b 100644
> --- a/lib/Target/R600/SIISelLowering.h
> +++ b/lib/Target/R600/SIISelLowering.h
> @@ -25,6 +25,8 @@ class SITargetLowering : public AMDGPUTargetLowering {
>                           SDValue Chain, unsigned Offset, bool Signed) const;
>    SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
>                                 SelectionDAG &DAG) const;
> +  SDValue LowerSampleRawIntrinsic(unsigned Opcode, const SDValue &Op,
> +                                  SelectionDAG &DAG) const;
>    SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
>    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
>    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index 3368d49..23a7ca3 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -57,6 +57,50 @@ def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">;
>  def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
>  def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
>  
> +class SDSampleRaw<string opcode> : SDNode <opcode,
> +  SDTypeProfile<1, 11,
> +    [SDTCisVT<0, v4f32>, // vdata(VGPR)
> +     SDTCisVT<2, v32i8>, // rsrc(SGPR)
> +     SDTCisVT<3, v4i32>, // sampler(SGPR)
> +     SDTCisVT<4, i32>,   // dmask(imm)
> +     SDTCisVT<5, i32>,   // unorm(imm)
> +     SDTCisVT<6, i32>,   // r128(imm)
> +     SDTCisVT<7, i32>,   // da(imm)
> +     SDTCisVT<8, i32>,   // glc(imm)
> +     SDTCisVT<9, i32>,   // slc(imm)
> +     SDTCisVT<10, i32>,   // tfe(imm)
> +     SDTCisVT<11, i32>   // lwe(imm)
> +    ]>
> +>;
> +
> +def SIgather4 : SDSampleRaw<"AMDGPUISD::GATHER4">;
> +def SIgather4_cl : SDSampleRaw<"AMDGPUISD::GATHER4_CL">;
> +def SIgather4_l : SDSampleRaw<"AMDGPUISD::GATHER4_L">;
> +def SIgather4_b : SDSampleRaw<"AMDGPUISD::GATHER4_B">;
> +def SIgather4_b_cl : SDSampleRaw<"AMDGPUISD::GATHER4_B_CL">;
> +def SIgather4_lz : SDSampleRaw<"AMDGPUISD::GATHER4_LZ">;
> +
> +def SIgather4_c : SDSampleRaw<"AMDGPUISD::GATHER4_C">;
> +def SIgather4_c_cl : SDSampleRaw<"AMDGPUISD::GATHER4_C_CL">;
> +def SIgather4_c_l : SDSampleRaw<"AMDGPUISD::GATHER4_C_L">;
> +def SIgather4_c_b : SDSampleRaw<"AMDGPUISD::GATHER4_C_B">;
> +def SIgather4_c_b_cl : SDSampleRaw<"AMDGPUISD::GATHER4_C_B_CL">;
> +def SIgather4_c_lz : SDSampleRaw<"AMDGPUISD::GATHER4_C_LZ">;
> +
> +def SIgather4_o : SDSampleRaw<"AMDGPUISD::GATHER4_O">;
> +def SIgather4_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_CL_O">;
> +def SIgather4_l_o : SDSampleRaw<"AMDGPUISD::GATHER4_L_O">;
> +def SIgather4_b_o : SDSampleRaw<"AMDGPUISD::GATHER4_B_O">;
> +def SIgather4_b_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_B_CL_O">;
> +def SIgather4_lz_o : SDSampleRaw<"AMDGPUISD::GATHER4_LZ_O">;
> +
> +def SIgather4_c_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_O">;
> +def SIgather4_c_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_CL_O">;
> +def SIgather4_c_l_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_L_O">;
> +def SIgather4_c_b_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_B_O">;
> +def SIgather4_c_b_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_B_CL_O">;
> +def SIgather4_c_lz_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_LZ_O">;
> +
>  // Transformation function, extract the lower 32bit of a 64bit immediate
>  def LO32 : SDNodeXForm<imm, [{
>    return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32);
> @@ -658,6 +702,53 @@ multiclass MIMG_Sampler <bits<7> op, string asm> {
>    defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4>;
>  }
>  
> +class MIMG_Gather_Helper <bits<7> op, string asm,
> +                          RegisterClass dst_rc,
> +                          RegisterClass src_rc> : MIMG <
> +  op,
> +  (outs dst_rc:$vdata),
> +  (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
> +       i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
> +       SReg_256:$srsrc, SReg_128:$ssamp),
> +  asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
> +     #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
> +  []> {
> +  let mayLoad = 1;
> +  let mayStore = 0;
> +
> +  // DMASK was repurposed for GATHER4. 4 components are always
> +  // returned and DMASK works like a swizzle - it selects
> +  // the component to fetch. The only useful DMASK values are
> +  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
> +  // (red,red,red,red) etc.) The ISA document doesn't mention
> +  // this.
> +  // Therefore, disable all code which updates DMASK by setting these two:
> +  let MIMG = 0;
> +  let hasPostISelHook = 0;
> +}
> +
> +multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm,
> +                                    RegisterClass dst_rc,
> +                                    int channels> {
> +  def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_32>,
> +            MIMG_Mask<asm#"_V1", channels>;
> +  def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64>,
> +            MIMG_Mask<asm#"_V2", channels>;
> +  def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128>,
> +            MIMG_Mask<asm#"_V4", channels>;
> +  def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256>,
> +            MIMG_Mask<asm#"_V8", channels>;
> +  def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512>,
> +            MIMG_Mask<asm#"_V16", channels>;
> +}
> +
> +multiclass MIMG_Gather <bits<7> op, string asm> {
> +  defm _V1 : MIMG_Gather_Src_Helper<op, asm, VReg_32, 1>;
> +  defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2>;
> +  defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3>;
> +  defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4>;
> +}
> +
>  //===----------------------------------------------------------------------===//
>  // Vector instruction mappings
>  //===----------------------------------------------------------------------===//
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index d4a7c5c..d65d88b 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -887,30 +887,30 @@ defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">;
>  //def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>;
>  //def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>;
>  //def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>;
> -//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>;
> -//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>;
> -//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>;
> -//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>;
> -//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>;
> -//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>;
> -//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>;
> -//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>;
> -//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>;
> -//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>;
> -//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>;
> -//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>;
> -//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>;
> -//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>;
> -//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>;
> -//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>;
> -//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>;
> -//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>;
> -//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>;
> -//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>;
> -//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>;
> -//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>;
> -//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>;
> -//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>;
> +defm IMAGE_GATHER4          : MIMG_Gather <0x00000040, "IMAGE_GATHER4">;
> +defm IMAGE_GATHER4_CL       : MIMG_Gather <0x00000041, "IMAGE_GATHER4_CL">;
> +defm IMAGE_GATHER4_L        : MIMG_Gather <0x00000044, "IMAGE_GATHER4_L">;
> +defm IMAGE_GATHER4_B        : MIMG_Gather <0x00000045, "IMAGE_GATHER4_B">;
> +defm IMAGE_GATHER4_B_CL     : MIMG_Gather <0x00000046, "IMAGE_GATHER4_B_CL">;
> +defm IMAGE_GATHER4_LZ       : MIMG_Gather <0x00000047, "IMAGE_GATHER4_LZ">;
> +defm IMAGE_GATHER4_C        : MIMG_Gather <0x00000048, "IMAGE_GATHER4_C">;
> +defm IMAGE_GATHER4_C_CL     : MIMG_Gather <0x00000049, "IMAGE_GATHER4_C_CL">;
> +defm IMAGE_GATHER4_C_L      : MIMG_Gather <0x0000004c, "IMAGE_GATHER4_C_L">;
> +defm IMAGE_GATHER4_C_B      : MIMG_Gather <0x0000004d, "IMAGE_GATHER4_C_B">;
> +defm IMAGE_GATHER4_C_B_CL   : MIMG_Gather <0x0000004e, "IMAGE_GATHER4_C_B_CL">;
> +defm IMAGE_GATHER4_C_LZ     : MIMG_Gather <0x0000004f, "IMAGE_GATHER4_C_LZ">;
> +defm IMAGE_GATHER4_O        : MIMG_Gather <0x00000050, "IMAGE_GATHER4_O">;
> +defm IMAGE_GATHER4_CL_O     : MIMG_Gather <0x00000051, "IMAGE_GATHER4_CL_O">;
> +defm IMAGE_GATHER4_L_O      : MIMG_Gather <0x00000054, "IMAGE_GATHER4_L_O">;
> +defm IMAGE_GATHER4_B_O      : MIMG_Gather <0x00000055, "IMAGE_GATHER4_B_O">;
> +defm IMAGE_GATHER4_B_CL_O   : MIMG_Gather <0x00000056, "IMAGE_GATHER4_B_CL_O">;
> +defm IMAGE_GATHER4_LZ_O     : MIMG_Gather <0x00000057, "IMAGE_GATHER4_LZ_O">;
> +defm IMAGE_GATHER4_C_O      : MIMG_Gather <0x00000058, "IMAGE_GATHER4_C_O">;
> +defm IMAGE_GATHER4_C_CL_O   : MIMG_Gather <0x00000059, "IMAGE_GATHER4_C_CL_O">;
> +defm IMAGE_GATHER4_C_L_O    : MIMG_Gather <0x0000005c, "IMAGE_GATHER4_C_L_O">;
> +defm IMAGE_GATHER4_C_B_O    : MIMG_Gather <0x0000005d, "IMAGE_GATHER4_C_B_O">;
> +defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "IMAGE_GATHER4_C_B_CL_O">;
> +defm IMAGE_GATHER4_C_LZ_O   : MIMG_Gather <0x0000005f, "IMAGE_GATHER4_C_LZ_O">;
>  //def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>;
>  //def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>;
>  //def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>;
> @@ -1655,6 +1655,54 @@ def : SextInReg <i16, 16>;
>  /********** Image sampling patterns **********/
>  /********** ======================= **********/
>  
> +class SampleRawPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
> +  (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, i32:$dmask, i32:$unorm,
> +        i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe),
> +  (opcode (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $da),
> +          (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $slc),
> +          $addr, $rsrc, $sampler)
> +>;
> +
> +// Gather4 patterns. Only the variants which make sense are defined.
> +def : SampleRawPattern<SIgather4,           IMAGE_GATHER4_V4_V2,        v2i32>;
> +def : SampleRawPattern<SIgather4,           IMAGE_GATHER4_V4_V4,        v4i32>;
> +def : SampleRawPattern<SIgather4_cl,        IMAGE_GATHER4_CL_V4_V4,     v4i32>;
> +def : SampleRawPattern<SIgather4_l,         IMAGE_GATHER4_L_V4_V4,      v4i32>;
> +def : SampleRawPattern<SIgather4_b,         IMAGE_GATHER4_B_V4_V4,      v4i32>;
> +def : SampleRawPattern<SIgather4_b_cl,      IMAGE_GATHER4_B_CL_V4_V4,   v4i32>;
> +def : SampleRawPattern<SIgather4_b_cl,      IMAGE_GATHER4_B_CL_V4_V8,   v8i32>;
> +def : SampleRawPattern<SIgather4_lz,        IMAGE_GATHER4_LZ_V4_V2,     v2i32>;
> +def : SampleRawPattern<SIgather4_lz,        IMAGE_GATHER4_LZ_V4_V4,     v4i32>;
> +
> +def : SampleRawPattern<SIgather4_c,         IMAGE_GATHER4_C_V4_V4,      v4i32>;
> +def : SampleRawPattern<SIgather4_c_cl,      IMAGE_GATHER4_C_CL_V4_V4,   v4i32>;
> +def : SampleRawPattern<SIgather4_c_cl,      IMAGE_GATHER4_C_CL_V4_V8,   v8i32>;
> +def : SampleRawPattern<SIgather4_c_l,       IMAGE_GATHER4_C_L_V4_V4,    v4i32>;
> +def : SampleRawPattern<SIgather4_c_l,       IMAGE_GATHER4_C_L_V4_V8,    v8i32>;
> +def : SampleRawPattern<SIgather4_c_b,       IMAGE_GATHER4_C_B_V4_V4,    v4i32>;
> +def : SampleRawPattern<SIgather4_c_b,       IMAGE_GATHER4_C_B_V4_V8,    v8i32>;
> +def : SampleRawPattern<SIgather4_c_b_cl,    IMAGE_GATHER4_C_B_CL_V4_V8, v8i32>;
> +def : SampleRawPattern<SIgather4_c_lz,      IMAGE_GATHER4_C_LZ_V4_V4,   v4i32>;
> +
> +def : SampleRawPattern<SIgather4_o,         IMAGE_GATHER4_O_V4_V4,      v4i32>;
> +def : SampleRawPattern<SIgather4_cl_o,      IMAGE_GATHER4_CL_O_V4_V4,   v4i32>;
> +def : SampleRawPattern<SIgather4_cl_o,      IMAGE_GATHER4_CL_O_V4_V8,   v8i32>;
> +def : SampleRawPattern<SIgather4_l_o,       IMAGE_GATHER4_L_O_V4_V4,    v4i32>;
> +def : SampleRawPattern<SIgather4_l_o,       IMAGE_GATHER4_L_O_V4_V8,    v8i32>;
> +def : SampleRawPattern<SIgather4_b_o,       IMAGE_GATHER4_B_O_V4_V4,    v4i32>;
> +def : SampleRawPattern<SIgather4_b_o,       IMAGE_GATHER4_B_O_V4_V8,    v8i32>;
> +def : SampleRawPattern<SIgather4_b_cl_o,    IMAGE_GATHER4_B_CL_O_V4_V8, v8i32>;
> +def : SampleRawPattern<SIgather4_lz_o,      IMAGE_GATHER4_LZ_O_V4_V4,   v4i32>;
> +
> +def : SampleRawPattern<SIgather4_c_o,       IMAGE_GATHER4_C_O_V4_V4,    v4i32>;
> +def : SampleRawPattern<SIgather4_c_o,       IMAGE_GATHER4_C_O_V4_V8,    v8i32>;
> +def : SampleRawPattern<SIgather4_c_cl_o,    IMAGE_GATHER4_C_CL_O_V4_V8, v8i32>;
> +def : SampleRawPattern<SIgather4_c_l_o,     IMAGE_GATHER4_C_L_O_V4_V8,  v8i32>;
> +def : SampleRawPattern<SIgather4_c_b_o,     IMAGE_GATHER4_C_B_O_V4_V8,  v8i32>;
> +def : SampleRawPattern<SIgather4_c_b_cl_o,  IMAGE_GATHER4_C_B_CL_O_V4_V8, v8i32>;
> +def : SampleRawPattern<SIgather4_c_lz_o,    IMAGE_GATHER4_C_LZ_O_V4_V4, v4i32>;
> +def : SampleRawPattern<SIgather4_c_lz_o,    IMAGE_GATHER4_C_LZ_O_V4_V8, v8i32>;
> +
>  /* SIsample for simple 1D texture lookup */
>  def : Pat <
>    (SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
> diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
> index 00e32c0..9d85f17 100644
> --- a/lib/Target/R600/SIIntrinsics.td
> +++ b/lib/Target/R600/SIIntrinsics.td
> @@ -56,11 +56,59 @@ let TargetPrefix = "SI", isTarget = 1 in {
>  
>    class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
>  
> +  // Fully-flexible SAMPLE instruction.
> +  class SampleRaw : Intrinsic <
> +    [llvm_v4f32_ty],    // vdata(VGPR)
> +    [llvm_anyvector_ty, // vaddr(VGPR)
> +     llvm_v32i8_ty,     // rsrc(SGPR)
> +     llvm_v16i8_ty,     // sampler(SGPR)
> +     llvm_i32_ty,       // dmask(imm)
> +     llvm_i32_ty,       // unorm(imm)
> +     llvm_i32_ty,       // r128(imm)
> +     llvm_i32_ty,       // da(imm)
> +     llvm_i32_ty,       // glc(imm)
> +     llvm_i32_ty,       // slc(imm)
> +     llvm_i32_ty,       // tfe(imm)
> +     llvm_i32_ty],      // lwe(imm)
> +    [IntrNoMem]>;
> +
>    def int_SI_sample : Sample;
>    def int_SI_sampleb : Sample;
>    def int_SI_sampled : Sample;
>    def int_SI_samplel : Sample;
>  
> +  // Basic gather4
> +  def int_SI_gather4 : SampleRaw;
> +  def int_SI_gather4_cl : SampleRaw;
> +  def int_SI_gather4_l : SampleRaw;
> +  def int_SI_gather4_b : SampleRaw;
> +  def int_SI_gather4_b_cl : SampleRaw;
> +  def int_SI_gather4_lz : SampleRaw;
> +
> +  // Gather4 with comparison
> +  def int_SI_gather4_c : SampleRaw;
> +  def int_SI_gather4_c_cl : SampleRaw;
> +  def int_SI_gather4_c_l : SampleRaw;
> +  def int_SI_gather4_c_b : SampleRaw;
> +  def int_SI_gather4_c_b_cl : SampleRaw;
> +  def int_SI_gather4_c_lz : SampleRaw;
> +
> +  // Gather4 with offsets
> +  def int_SI_gather4_o : SampleRaw;
> +  def int_SI_gather4_cl_o : SampleRaw;
> +  def int_SI_gather4_l_o : SampleRaw;
> +  def int_SI_gather4_b_o : SampleRaw;
> +  def int_SI_gather4_b_cl_o : SampleRaw;
> +  def int_SI_gather4_lz_o : SampleRaw;
> +
> +  // Gather4 with comparison and offsets
> +  def int_SI_gather4_c_o : SampleRaw;
> +  def int_SI_gather4_c_cl_o : SampleRaw;
> +  def int_SI_gather4_c_l_o : SampleRaw;
> +  def int_SI_gather4_c_b_o : SampleRaw;
> +  def int_SI_gather4_c_b_cl_o : SampleRaw;
> +  def int_SI_gather4_c_lz_o : SampleRaw;
> +
>    def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
>  
>    def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
> -- 
> 1.9.1
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list