[Mesa-dev] [PATCH 1/2] R600/SI: add Gather4 intrinsics (v2)
Marek Olšák
maraeo at gmail.com
Mon Jun 16 12:19:59 PDT 2014
Why are there SDNodes for the other "sample" intrinsics then?
Marek
On Mon, Jun 16, 2014 at 5:45 PM, Tom Stellard <tom at stellard.net> wrote:
> On Thu, Jun 12, 2014 at 02:11:10AM +0200, Marek Olšák wrote:
>> From: Marek Olšák <marek.olsak at amd.com>
>>
>> This adds a new type of intrinsic and SDNode: SampleRaw.
>> All fields of the MIMG opcodes are exposed and can be set by Mesa,
>> even DMASK. All GATHER4 variants are added and there are a lot of them.
>>
>> v2: document DMASK behavior
>> ---
>> lib/Target/R600/AMDGPUISelLowering.cpp | 24 +++++++++
>> lib/Target/R600/AMDGPUISelLowering.h | 31 +++++++++++
>> lib/Target/R600/SIISelLowering.cpp | 72 +++++++++++++++++++++++++
>> lib/Target/R600/SIISelLowering.h | 2 +
>> lib/Target/R600/SIInstrInfo.td | 91 ++++++++++++++++++++++++++++++++
>> lib/Target/R600/SIInstructions.td | 96 +++++++++++++++++++++++++---------
>> lib/Target/R600/SIIntrinsics.td | 48 +++++++++++++++++
>> 7 files changed, 340 insertions(+), 24 deletions(-)
>>
>> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
>> index 849f169..359161c 100644
>> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
>> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
>> @@ -1542,6 +1542,30 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
>> NODE_NAME_CASE(SAMPLEB)
>> NODE_NAME_CASE(SAMPLED)
>> NODE_NAME_CASE(SAMPLEL)
>> + NODE_NAME_CASE(GATHER4)
>> + NODE_NAME_CASE(GATHER4_CL)
>> + NODE_NAME_CASE(GATHER4_L)
>> + NODE_NAME_CASE(GATHER4_B)
>> + NODE_NAME_CASE(GATHER4_B_CL)
>> + NODE_NAME_CASE(GATHER4_LZ)
>> + NODE_NAME_CASE(GATHER4_C)
>> + NODE_NAME_CASE(GATHER4_C_CL)
>> + NODE_NAME_CASE(GATHER4_C_L)
>> + NODE_NAME_CASE(GATHER4_C_B)
>> + NODE_NAME_CASE(GATHER4_C_B_CL)
>> + NODE_NAME_CASE(GATHER4_C_LZ)
>> + NODE_NAME_CASE(GATHER4_O)
>> + NODE_NAME_CASE(GATHER4_CL_O)
>> + NODE_NAME_CASE(GATHER4_L_O)
>> + NODE_NAME_CASE(GATHER4_B_O)
>> + NODE_NAME_CASE(GATHER4_B_CL_O)
>> + NODE_NAME_CASE(GATHER4_LZ_O)
>> + NODE_NAME_CASE(GATHER4_C_O)
>> + NODE_NAME_CASE(GATHER4_C_CL_O)
>> + NODE_NAME_CASE(GATHER4_C_L_O)
>> + NODE_NAME_CASE(GATHER4_C_B_O)
>> + NODE_NAME_CASE(GATHER4_C_B_CL_O)
>> + NODE_NAME_CASE(GATHER4_C_LZ_O)
>
> You don't need to add new SDNodes for all these instructions, you can just use
> the intrinsic directly in the pattern.
>
> The only reason to add SDNodes, is if there are optimizations / special lowering
> we can do for these instructions.
>
>> NODE_NAME_CASE(STORE_MSKOR)
>> NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
>> }
>> diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
>> index d5d821d..a9af195 100644
>> --- a/lib/Target/R600/AMDGPUISelLowering.h
>> +++ b/lib/Target/R600/AMDGPUISelLowering.h
>> @@ -203,6 +203,37 @@ enum {
>> SAMPLEB,
>> SAMPLED,
>> SAMPLEL,
>> +
>> + // Gather4 opcodes
>> + GATHER4,
>> + GATHER4_CL,
>> + GATHER4_L,
>> + GATHER4_B,
>> + GATHER4_B_CL,
>> + GATHER4_LZ,
>> +
>> + GATHER4_C,
>> + GATHER4_C_CL,
>> + GATHER4_C_L,
>> + GATHER4_C_B,
>> + GATHER4_C_B_CL,
>> + GATHER4_C_LZ,
>> +
>> + GATHER4_O,
>> + GATHER4_CL_O,
>> + GATHER4_L_O,
>> + GATHER4_B_O,
>> + GATHER4_B_CL_O,
>> + GATHER4_LZ_O,
>> +
>> + GATHER4_C_O,
>> + GATHER4_C_CL_O,
>> + GATHER4_C_L_O,
>> + GATHER4_C_B_O,
>> + GATHER4_C_B_CL_O,
>> + GATHER4_C_LZ_O,
>> +
>> + // Nemory opcodes
>> FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
>> STORE_MSKOR,
>> LOAD_CONSTANT,
>> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
>> index 1a861d4..909255d 100644
>> --- a/lib/Target/R600/SIISelLowering.cpp
>> +++ b/lib/Target/R600/SIISelLowering.cpp
>> @@ -688,6 +688,59 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
>> Op.getOperand(1),
>> Op.getOperand(2),
>> Op.getOperand(3));
>> +
>> + // Gather4 intrinsics
>> + case AMDGPUIntrinsic::SI_gather4:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4, Op, DAG);
>> + case AMDGPUIntrinsic::SI_gather4_cl:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_CL, Op, DAG);
>> + case AMDGPUIntrinsic::SI_gather4_l:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_L, Op, DAG);
>> + case AMDGPUIntrinsic::SI_gather4_b:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B, Op, DAG);
>> + case AMDGPUIntrinsic::SI_gather4_b_cl:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_CL, Op, DAG);
>> + case AMDGPUIntrinsic::SI_gather4_lz:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_LZ, Op, DAG);
>> +
>> + case AMDGPUIntrinsic::SI_gather4_c:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C, Op, DAG);
>> + case AMDGPUIntrinsic::SI_gather4_c_cl:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_CL, Op, DAG);
>> + case AMDGPUIntrinsic::SI_gather4_c_l:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_L, Op, DAG);
>> + case AMDGPUIntrinsic::SI_gather4_c_b:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B, Op, DAG);
>> + case AMDGPUIntrinsic::SI_gather4_c_b_cl:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B_CL, Op, DAG);
>> + case AMDGPUIntrinsic::SI_gather4_c_lz:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_LZ, Op, DAG);
>> +
>> + case AMDGPUIntrinsic::SI_gather4_o:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_O, Op, DAG);
>> + case AMDGPUIntrinsic::SI_gather4_cl_o:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_CL_O, Op, DAG);
>> + case AMDGPUIntrinsic::SI_gather4_l_o:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_L_O, Op, DAG);
>> + case AMDGPUIntrinsic::SI_gather4_b_o:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_O, Op, DAG);
>> + case AMDGPUIntrinsic::SI_gather4_b_cl_o:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_CL_O, Op, DAG);
>> + case AMDGPUIntrinsic::SI_gather4_lz_o:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_LZ_O, Op, DAG);
>> +
>> + case AMDGPUIntrinsic::SI_gather4_c_o:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_O, Op, DAG);
>> + case AMDGPUIntrinsic::SI_gather4_c_cl_o:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_CL_O, Op, DAG);
>> + case AMDGPUIntrinsic::SI_gather4_c_l_o:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_L_O, Op, DAG);
>> + case AMDGPUIntrinsic::SI_gather4_c_b_o:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B_O, Op, DAG);
>> + case AMDGPUIntrinsic::SI_gather4_c_b_cl_o:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B_CL_O, Op, DAG);
>> + case AMDGPUIntrinsic::SI_gather4_c_lz_o:
>> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_LZ_O, Op, DAG);
>> }
>> }
>>
>> @@ -876,6 +929,25 @@ SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
>> Op.getOperand(4));
>> }
>>
>> +SDValue SITargetLowering::LowerSampleRawIntrinsic(unsigned Opcode,
>> + const SDValue &Op,
>> + SelectionDAG &DAG) const {
>> + SDValue Ops[] = {
>> + Op.getOperand(1),
>> + Op.getOperand(2),
>> + Op.getOperand(3),
>> + Op.getOperand(4),
>> + Op.getOperand(5),
>> + Op.getOperand(6),
>> + Op.getOperand(7),
>> + Op.getOperand(8),
>> + Op.getOperand(9),
>> + Op.getOperand(10),
>> + Op.getOperand(11)
>> + };
>> + return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Ops);
>> +}
>> +
>> SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
>> if (Op.getValueType() != MVT::i64)
>> return SDValue();
>> diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
>> index c6eaa81..b48da3b 100644
>> --- a/lib/Target/R600/SIISelLowering.h
>> +++ b/lib/Target/R600/SIISelLowering.h
>> @@ -25,6 +25,8 @@ class SITargetLowering : public AMDGPUTargetLowering {
>> SDValue Chain, unsigned Offset, bool Signed) const;
>> SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
>> SelectionDAG &DAG) const;
>> + SDValue LowerSampleRawIntrinsic(unsigned Opcode, const SDValue &Op,
>> + SelectionDAG &DAG) const;
>> SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
>> SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
>> SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
>> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
>> index 3368d49..23a7ca3 100644
>> --- a/lib/Target/R600/SIInstrInfo.td
>> +++ b/lib/Target/R600/SIInstrInfo.td
>> @@ -57,6 +57,50 @@ def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">;
>> def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
>> def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
>>
>> +class SDSampleRaw<string opcode> : SDNode <opcode,
>> + SDTypeProfile<1, 11,
>> + [SDTCisVT<0, v4f32>, // vdata(VGPR)
>> + SDTCisVT<2, v32i8>, // rsrc(SGPR)
>> + SDTCisVT<3, v4i32>, // sampler(SGPR)
>> + SDTCisVT<4, i32>, // dmask(imm)
>> + SDTCisVT<5, i32>, // unorm(imm)
>> + SDTCisVT<6, i32>, // r128(imm)
>> + SDTCisVT<7, i32>, // da(imm)
>> + SDTCisVT<8, i32>, // glc(imm)
>> + SDTCisVT<9, i32>, // slc(imm)
>> + SDTCisVT<10, i32>, // tfe(imm)
>> + SDTCisVT<11, i32> // lwe(imm)
>> + ]>
>> +>;
>> +
>> +def SIgather4 : SDSampleRaw<"AMDGPUISD::GATHER4">;
>> +def SIgather4_cl : SDSampleRaw<"AMDGPUISD::GATHER4_CL">;
>> +def SIgather4_l : SDSampleRaw<"AMDGPUISD::GATHER4_L">;
>> +def SIgather4_b : SDSampleRaw<"AMDGPUISD::GATHER4_B">;
>> +def SIgather4_b_cl : SDSampleRaw<"AMDGPUISD::GATHER4_B_CL">;
>> +def SIgather4_lz : SDSampleRaw<"AMDGPUISD::GATHER4_LZ">;
>> +
>> +def SIgather4_c : SDSampleRaw<"AMDGPUISD::GATHER4_C">;
>> +def SIgather4_c_cl : SDSampleRaw<"AMDGPUISD::GATHER4_C_CL">;
>> +def SIgather4_c_l : SDSampleRaw<"AMDGPUISD::GATHER4_C_L">;
>> +def SIgather4_c_b : SDSampleRaw<"AMDGPUISD::GATHER4_C_B">;
>> +def SIgather4_c_b_cl : SDSampleRaw<"AMDGPUISD::GATHER4_C_B_CL">;
>> +def SIgather4_c_lz : SDSampleRaw<"AMDGPUISD::GATHER4_C_LZ">;
>> +
>> +def SIgather4_o : SDSampleRaw<"AMDGPUISD::GATHER4_O">;
>> +def SIgather4_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_CL_O">;
>> +def SIgather4_l_o : SDSampleRaw<"AMDGPUISD::GATHER4_L_O">;
>> +def SIgather4_b_o : SDSampleRaw<"AMDGPUISD::GATHER4_B_O">;
>> +def SIgather4_b_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_B_CL_O">;
>> +def SIgather4_lz_o : SDSampleRaw<"AMDGPUISD::GATHER4_LZ_O">;
>> +
>> +def SIgather4_c_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_O">;
>> +def SIgather4_c_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_CL_O">;
>> +def SIgather4_c_l_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_L_O">;
>> +def SIgather4_c_b_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_B_O">;
>> +def SIgather4_c_b_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_B_CL_O">;
>> +def SIgather4_c_lz_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_LZ_O">;
>> +
>> // Transformation function, extract the lower 32bit of a 64bit immediate
>> def LO32 : SDNodeXForm<imm, [{
>> return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32);
>> @@ -658,6 +702,53 @@ multiclass MIMG_Sampler <bits<7> op, string asm> {
>> defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4>;
>> }
>>
>> +class MIMG_Gather_Helper <bits<7> op, string asm,
>> + RegisterClass dst_rc,
>> + RegisterClass src_rc> : MIMG <
>> + op,
>> + (outs dst_rc:$vdata),
>> + (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
>> + i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
>> + SReg_256:$srsrc, SReg_128:$ssamp),
>> + asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
>> + #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
>> + []> {
>> + let mayLoad = 1;
>> + let mayStore = 0;
>> +
>> + // DMASK was repurposed for GATHER4. 4 components are always
>> + // returned and DMASK works like a swizzle - it selects
>> + // the component to fetch. The only useful DMASK values are
>> + // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
>> + // (red,red,red,red) etc.) The ISA document doesn't mention
>> + // this.
>> + // Therefore, disable all code which updates DMASK by setting these two:
>> + let MIMG = 0;
>> + let hasPostISelHook = 0;
>> +}
>> +
>> +multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm,
>> + RegisterClass dst_rc,
>> + int channels> {
>> + def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_32>,
>> + MIMG_Mask<asm#"_V1", channels>;
>> + def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64>,
>> + MIMG_Mask<asm#"_V2", channels>;
>> + def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128>,
>> + MIMG_Mask<asm#"_V4", channels>;
>> + def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256>,
>> + MIMG_Mask<asm#"_V8", channels>;
>> + def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512>,
>> + MIMG_Mask<asm#"_V16", channels>;
>> +}
>> +
>> +multiclass MIMG_Gather <bits<7> op, string asm> {
>> + defm _V1 : MIMG_Gather_Src_Helper<op, asm, VReg_32, 1>;
>> + defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2>;
>> + defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3>;
>> + defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4>;
>> +}
>> +
>> //===----------------------------------------------------------------------===//
>> // Vector instruction mappings
>> //===----------------------------------------------------------------------===//
>> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
>> index d4a7c5c..d65d88b 100644
>> --- a/lib/Target/R600/SIInstructions.td
>> +++ b/lib/Target/R600/SIInstructions.td
>> @@ -887,30 +887,30 @@ defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">;
>> //def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>;
>> //def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>;
>> //def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>;
>> -//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>;
>> -//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>;
>> -//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>;
>> -//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>;
>> -//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>;
>> -//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>;
>> -//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>;
>> -//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>;
>> -//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>;
>> -//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>;
>> -//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>;
>> -//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>;
>> -//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>;
>> -//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>;
>> -//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>;
>> -//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>;
>> -//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>;
>> -//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>;
>> -//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>;
>> -//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>;
>> -//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>;
>> -//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>;
>> -//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>;
>> -//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>;
>> +defm IMAGE_GATHER4 : MIMG_Gather <0x00000040, "IMAGE_GATHER4">;
>> +defm IMAGE_GATHER4_CL : MIMG_Gather <0x00000041, "IMAGE_GATHER4_CL">;
>> +defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, "IMAGE_GATHER4_L">;
>> +defm IMAGE_GATHER4_B : MIMG_Gather <0x00000045, "IMAGE_GATHER4_B">;
>> +defm IMAGE_GATHER4_B_CL : MIMG_Gather <0x00000046, "IMAGE_GATHER4_B_CL">;
>> +defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, "IMAGE_GATHER4_LZ">;
>> +defm IMAGE_GATHER4_C : MIMG_Gather <0x00000048, "IMAGE_GATHER4_C">;
>> +defm IMAGE_GATHER4_C_CL : MIMG_Gather <0x00000049, "IMAGE_GATHER4_C_CL">;
>> +defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, "IMAGE_GATHER4_C_L">;
>> +defm IMAGE_GATHER4_C_B : MIMG_Gather <0x0000004d, "IMAGE_GATHER4_C_B">;
>> +defm IMAGE_GATHER4_C_B_CL : MIMG_Gather <0x0000004e, "IMAGE_GATHER4_C_B_CL">;
>> +defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, "IMAGE_GATHER4_C_LZ">;
>> +defm IMAGE_GATHER4_O : MIMG_Gather <0x00000050, "IMAGE_GATHER4_O">;
>> +defm IMAGE_GATHER4_CL_O : MIMG_Gather <0x00000051, "IMAGE_GATHER4_CL_O">;
>> +defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, "IMAGE_GATHER4_L_O">;
>> +defm IMAGE_GATHER4_B_O : MIMG_Gather <0x00000055, "IMAGE_GATHER4_B_O">;
>> +defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, "IMAGE_GATHER4_B_CL_O">;
>> +defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, "IMAGE_GATHER4_LZ_O">;
>> +defm IMAGE_GATHER4_C_O : MIMG_Gather <0x00000058, "IMAGE_GATHER4_C_O">;
>> +defm IMAGE_GATHER4_C_CL_O : MIMG_Gather <0x00000059, "IMAGE_GATHER4_C_CL_O">;
>> +defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, "IMAGE_GATHER4_C_L_O">;
>> +defm IMAGE_GATHER4_C_B_O : MIMG_Gather <0x0000005d, "IMAGE_GATHER4_C_B_O">;
>> +defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "IMAGE_GATHER4_C_B_CL_O">;
>> +defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "IMAGE_GATHER4_C_LZ_O">;
>> //def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>;
>> //def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>;
>> //def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>;
>> @@ -1655,6 +1655,54 @@ def : SextInReg <i16, 16>;
>> /********** Image sampling patterns **********/
>> /********** ======================= **********/
>>
>> +class SampleRawPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
>> + (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, i32:$dmask, i32:$unorm,
>> + i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe),
>> + (opcode (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $da),
>> + (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $slc),
>> + $addr, $rsrc, $sampler)
>> +>;
>> +
>> +// Gather4 patterns. Only the variants which make sense are defined.
>> +def : SampleRawPattern<SIgather4, IMAGE_GATHER4_V4_V2, v2i32>;
>> +def : SampleRawPattern<SIgather4, IMAGE_GATHER4_V4_V4, v4i32>;
>> +def : SampleRawPattern<SIgather4_cl, IMAGE_GATHER4_CL_V4_V4, v4i32>;
>> +def : SampleRawPattern<SIgather4_l, IMAGE_GATHER4_L_V4_V4, v4i32>;
>> +def : SampleRawPattern<SIgather4_b, IMAGE_GATHER4_B_V4_V4, v4i32>;
>> +def : SampleRawPattern<SIgather4_b_cl, IMAGE_GATHER4_B_CL_V4_V4, v4i32>;
>> +def : SampleRawPattern<SIgather4_b_cl, IMAGE_GATHER4_B_CL_V4_V8, v8i32>;
>> +def : SampleRawPattern<SIgather4_lz, IMAGE_GATHER4_LZ_V4_V2, v2i32>;
>> +def : SampleRawPattern<SIgather4_lz, IMAGE_GATHER4_LZ_V4_V4, v4i32>;
>> +
>> +def : SampleRawPattern<SIgather4_c, IMAGE_GATHER4_C_V4_V4, v4i32>;
>> +def : SampleRawPattern<SIgather4_c_cl, IMAGE_GATHER4_C_CL_V4_V4, v4i32>;
>> +def : SampleRawPattern<SIgather4_c_cl, IMAGE_GATHER4_C_CL_V4_V8, v8i32>;
>> +def : SampleRawPattern<SIgather4_c_l, IMAGE_GATHER4_C_L_V4_V4, v4i32>;
>> +def : SampleRawPattern<SIgather4_c_l, IMAGE_GATHER4_C_L_V4_V8, v8i32>;
>> +def : SampleRawPattern<SIgather4_c_b, IMAGE_GATHER4_C_B_V4_V4, v4i32>;
>> +def : SampleRawPattern<SIgather4_c_b, IMAGE_GATHER4_C_B_V4_V8, v8i32>;
>> +def : SampleRawPattern<SIgather4_c_b_cl, IMAGE_GATHER4_C_B_CL_V4_V8, v8i32>;
>> +def : SampleRawPattern<SIgather4_c_lz, IMAGE_GATHER4_C_LZ_V4_V4, v4i32>;
>> +
>> +def : SampleRawPattern<SIgather4_o, IMAGE_GATHER4_O_V4_V4, v4i32>;
>> +def : SampleRawPattern<SIgather4_cl_o, IMAGE_GATHER4_CL_O_V4_V4, v4i32>;
>> +def : SampleRawPattern<SIgather4_cl_o, IMAGE_GATHER4_CL_O_V4_V8, v8i32>;
>> +def : SampleRawPattern<SIgather4_l_o, IMAGE_GATHER4_L_O_V4_V4, v4i32>;
>> +def : SampleRawPattern<SIgather4_l_o, IMAGE_GATHER4_L_O_V4_V8, v8i32>;
>> +def : SampleRawPattern<SIgather4_b_o, IMAGE_GATHER4_B_O_V4_V4, v4i32>;
>> +def : SampleRawPattern<SIgather4_b_o, IMAGE_GATHER4_B_O_V4_V8, v8i32>;
>> +def : SampleRawPattern<SIgather4_b_cl_o, IMAGE_GATHER4_B_CL_O_V4_V8, v8i32>;
>> +def : SampleRawPattern<SIgather4_lz_o, IMAGE_GATHER4_LZ_O_V4_V4, v4i32>;
>> +
>> +def : SampleRawPattern<SIgather4_c_o, IMAGE_GATHER4_C_O_V4_V4, v4i32>;
>> +def : SampleRawPattern<SIgather4_c_o, IMAGE_GATHER4_C_O_V4_V8, v8i32>;
>> +def : SampleRawPattern<SIgather4_c_cl_o, IMAGE_GATHER4_C_CL_O_V4_V8, v8i32>;
>> +def : SampleRawPattern<SIgather4_c_l_o, IMAGE_GATHER4_C_L_O_V4_V8, v8i32>;
>> +def : SampleRawPattern<SIgather4_c_b_o, IMAGE_GATHER4_C_B_O_V4_V8, v8i32>;
>> +def : SampleRawPattern<SIgather4_c_b_cl_o, IMAGE_GATHER4_C_B_CL_O_V4_V8, v8i32>;
>> +def : SampleRawPattern<SIgather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V4, v4i32>;
>> +def : SampleRawPattern<SIgather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V8, v8i32>;
>> +
>> /* SIsample for simple 1D texture lookup */
>> def : Pat <
>> (SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
>> diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
>> index 00e32c0..9d85f17 100644
>> --- a/lib/Target/R600/SIIntrinsics.td
>> +++ b/lib/Target/R600/SIIntrinsics.td
>> @@ -56,11 +56,59 @@ let TargetPrefix = "SI", isTarget = 1 in {
>>
>> class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
>>
>> + // Fully-flexible SAMPLE instruction.
>> + class SampleRaw : Intrinsic <
>> + [llvm_v4f32_ty], // vdata(VGPR)
>> + [llvm_anyvector_ty, // vaddr(VGPR)
>> + llvm_v32i8_ty, // rsrc(SGPR)
>> + llvm_v16i8_ty, // sampler(SGPR)
>> + llvm_i32_ty, // dmask(imm)
>> + llvm_i32_ty, // unorm(imm)
>> + llvm_i32_ty, // r128(imm)
>> + llvm_i32_ty, // da(imm)
>> + llvm_i32_ty, // glc(imm)
>> + llvm_i32_ty, // slc(imm)
>> + llvm_i32_ty, // tfe(imm)
>> + llvm_i32_ty], // lwe(imm)
>> + [IntrNoMem]>;
>> +
>> def int_SI_sample : Sample;
>> def int_SI_sampleb : Sample;
>> def int_SI_sampled : Sample;
>> def int_SI_samplel : Sample;
>>
>> + // Basic gather4
>> + def int_SI_gather4 : SampleRaw;
>> + def int_SI_gather4_cl : SampleRaw;
>> + def int_SI_gather4_l : SampleRaw;
>> + def int_SI_gather4_b : SampleRaw;
>> + def int_SI_gather4_b_cl : SampleRaw;
>> + def int_SI_gather4_lz : SampleRaw;
>> +
>> + // Gather4 with comparison
>> + def int_SI_gather4_c : SampleRaw;
>> + def int_SI_gather4_c_cl : SampleRaw;
>> + def int_SI_gather4_c_l : SampleRaw;
>> + def int_SI_gather4_c_b : SampleRaw;
>> + def int_SI_gather4_c_b_cl : SampleRaw;
>> + def int_SI_gather4_c_lz : SampleRaw;
>> +
>> + // Gather4 with offsets
>> + def int_SI_gather4_o : SampleRaw;
>> + def int_SI_gather4_cl_o : SampleRaw;
>> + def int_SI_gather4_l_o : SampleRaw;
>> + def int_SI_gather4_b_o : SampleRaw;
>> + def int_SI_gather4_b_cl_o : SampleRaw;
>> + def int_SI_gather4_lz_o : SampleRaw;
>> +
>> + // Gather4 with comparison and offsets
>> + def int_SI_gather4_c_o : SampleRaw;
>> + def int_SI_gather4_c_cl_o : SampleRaw;
>> + def int_SI_gather4_c_l_o : SampleRaw;
>> + def int_SI_gather4_c_b_o : SampleRaw;
>> + def int_SI_gather4_c_b_cl_o : SampleRaw;
>> + def int_SI_gather4_c_lz_o : SampleRaw;
>> +
>> def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
>>
>> def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
>> --
>> 1.9.1
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list