[Mesa-dev] [PATCH] radeon/llvm: use specialised R600.store.pixel.* for fragment shader

Mon Oct 29 05:35:19 PDT 2012

On Sun, Oct 28, 2012 at 07:18:57PM +0100, Vincent Lejeune wrote:
> ---
>  lib/Target/AMDGPU/AMDGPUISelLowering.cpp           |   1 +
>  lib/Target/AMDGPU/AMDGPUISelLowering.h             |   1 +
>  .../AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp      |  11 +-
>  lib/Target/AMDGPU/R600ISelLowering.cpp             |  66 +++++++++++
>  lib/Target/AMDGPU/R600Instructions.td              | 121 +++++++++++++++++++++
>  lib/Target/AMDGPU/R600Intrinsics.td                |   8 ++
>  lib/Target/AMDGPU/R600MachineFunctionInfo.cpp      |   4 +-
>  lib/Target/AMDGPU/R600MachineFunctionInfo.h        |   2 +
>  8 files changed, 212 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
> index 8021fc4..57dcaac 100644
> --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
> +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
> @@ -346,5 +346,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
>    NODE_NAME_CASE(URECIP)
>    NODE_NAME_CASE(INTERP)
>    NODE_NAME_CASE(INTERP_P0)
> +  NODE_NAME_CASE(EXPORT)
>    }
>  }
> diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
> index 2d8ed82..58d2287 100644
> --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
> +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
> @@ -121,6 +121,7 @@ enum
>    URECIP,
>    INTERP,
>    INTERP_P0,
> +  EXPORT,
>    LAST_AMDGPU_ISD_NUMBER
>  };
>  
> diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
> index 35aa832..5178157 100644
> --- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
> +++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
> @@ -101,7 +101,8 @@ enum InstrTypes {
>    INSTR_TEX,
>    INSTR_FC,
>    INSTR_NATIVE,
> -  INSTR_VTX
> +  INSTR_VTX,
> +  INSTR_EXPORT
>  };
>  
>  enum FCInstr {
> @@ -177,6 +178,14 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
>          Emit(InstWord2, OS);
>          break;
>        }
> +    case AMDGPU::EG_Export:
> +    case AMDGPU::R600_Export:
> +      {
> +        uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
> +        EmitByte(INSTR_EXPORT, OS);
> +        Emit(Inst, OS);
> +        break;
> +      }
>  
>      default:
>        EmitALUInstr(MI, Fixups, OS);
> diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
> index dd13c0c..b25c90f 100644
> --- a/lib/Target/AMDGPU/R600ISelLowering.cpp
> +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
> @@ -305,6 +305,25 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
>  
>        return BB;
>      }
> +  case AMDGPU::EG_Export:
> +  case AMDGPU::R600_Export:
> +    {
> +      bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
> +      if (!EOP)
> +        return BB;
> +      unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_Export)? 84 : 40;
> +      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
> +            .addOperand(MI->getOperand(0))
> +            .addOperand(MI->getOperand(1))
> +            .addOperand(MI->getOperand(2))
> +            .addOperand(MI->getOperand(3))
> +            .addOperand(MI->getOperand(4))
> +            .addOperand(MI->getOperand(5))
> +            .addOperand(MI->getOperand(6))
> +            .addImm(CfInst)
> +            .addImm(1);
> +      break;
> +    }
>    }
>  
>    MI->eraseFromParent();
> @@ -343,6 +362,53 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
>        }
>        return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
>      }
> +    case AMDGPUIntrinsic::R600_store_pixel_color: {
> +      MachineFunction &MF = DAG.getMachineFunction();
> +      R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>();

Coding style:
R600MachineFunctionInfo *MFI

> +      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
> +      unsigned Slot = RegIndex / 4;
> +
> +      SDNode **OutputsMap = MFI->Outputs;
> +
> +      if (!OutputsMap[Slot]) {
> +        SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
> +          Op.getDebugLoc(), MVT::v4f32,
> +          DAG.getUNDEF(MVT::v4f32),
> +          Op.getOperand(2),
> +          DAG.getConstant(RegIndex % 4, MVT::i32));
> +
> +        const SDValue Ops[8] = {Chain, Vector, DAG.getConstant(0, MVT::i32),
> +            DAG.getConstant(Slot, MVT::i32), DAG.getConstant(0, MVT::i32),
> +            DAG.getConstant(1, MVT::i32), DAG.getConstant(2, MVT::i32),
> +            DAG.getConstant(3, MVT::i32)};
> +
> +        SDValue Res =  DAG.getNode(
> +            AMDGPUISD::EXPORT,
> +            Op.getDebugLoc(),
> +            MVT::Other,
> +            Ops, 8);
> +         OutputsMap[Slot] = Res.getNode();
> +         return Res;
> +      }
> +
> +      SDNode *ExportInstruction = (SDNode *) OutputsMap[Slot] ;
> +      SDValue PreviousVector = ExportInstruction->getOperand(1);
> +      SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
> +          Op.getDebugLoc(), MVT::v4f32,
> +          PreviousVector,
> +          Op.getOperand(2),
> +          DAG.getConstant(RegIndex % 4, MVT::i32));
> +
> +      const SDValue Ops[8] = {ExportInstruction->getOperand(0), Vector, DAG.getConstant(0, MVT::i32),
> +          DAG.getConstant(Slot, MVT::i32), DAG.getConstant(0, MVT::i32),
> +          DAG.getConstant(1, MVT::i32), DAG.getConstant(2, MVT::i32),
> +          DAG.getConstant(3, MVT::i32)};
> +
> +      DAG.UpdateNodeOperands(ExportInstruction,
> +          Ops, 8);
> +
> +      return Chain;
> +    }
>      // default for switch(IntrinsicID)
>      default: break;
>      }
> diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
> index 7f4c6a1..6138314 100644
> --- a/lib/Target/AMDGPU/R600Instructions.td
> +++ b/lib/Target/AMDGPU/R600Instructions.td
> @@ -456,6 +456,41 @@ def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []>
>  
>  def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>;
>  
> +//===----------------------------------------------------------------------===//
> +// Export Instructions
> +//===----------------------------------------------------------------------===//
> +
> +def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
> +
> +def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType,
> +  [SDNPHasChain, SDNPSideEffect]>;
> +
> +multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
> +  def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
> +    (ExportInst
> +        (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
> +        0, 61, 0, 7, 7, 7, cf_inst, 0)
> +  >;
> +
> +  def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
> +    (ExportInst
> +        (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
> +        0, 61, 7, 0, 7, 7, cf_inst, 0)
> +  >;
> +
> +  def : Pat<(int_R600_store_pixel_dummy),
> +    (ExportInst
> +        (v4f32 (IMPLICIT_DEF)), 0, 0, 7, 7, 7, 7, cf_inst, 0)
> +  >;
> +
> +  def : Pat<(EXPORT (v4f32 R600_Reg128:$src),
> +    (i32 imm:$type), (i32 imm:$arraybase),
> +    (i32 imm:$sw_x), (i32 imm:$sw_y), (i32 imm:$sw_z), (i32 imm:$sw_w)),
> +        (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
> +        imm:$sw_x, imm:$sw_y, imm:$sw_z, imm:$sw_w, cf_inst, 0)
> +  >;
> +}
> +
>  let Predicates = [isR600toCayman] in { 
>  
>  //===----------------------------------------------------------------------===//
> @@ -894,6 +929,48 @@ let Predicates = [isR600] in {
>    defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
>    def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
>  
> +  let isTerminator = 1, usesCustomInserter = 1 in {
> +
> +  def R600_Export :
> +  InstR600ISA<
> +        (outs),
> +        (ins R600_Reg128:$src, i32imm:$type, i32imm:$arraybase,
> +        i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst,
> +        i32imm:$eop),
> +        !strconcat("EXPORT", " $src"),
> +        []>{
> +    bits<13> arraybase;
> +    bits<2> type;
> +    bits<7> src;
> +
> +    bits<3> sw_x;
> +    bits<3> sw_y;
> +    bits<3> sw_z;
> +    bits<3> sw_w;
> +
> +    bits<1> eop;
> +    bits<8> inst;
> +
> +    let Inst{12-0} = arraybase;
> +    let Inst{14-13}   = type;
> +    let Inst{21-15} = src;
> +    let Inst{22} = 0; // RW_REL
> +    let Inst{29-23} = 0; // INDEX_GPR
> +    let Inst{31-30} = 3; // ELEM_SIZE
> +    let Inst{34-32} = sw_x;
> +    let Inst{37-35} = sw_y;
> +    let Inst{40-38} = sw_z;
> +    let Inst{43-41} = sw_w;
> +    let Inst{52-49} = 1; // BURST_COUNT
> +    let Inst{53} = 1; // VALID_PIXEL_MODE
> +    let Inst{54} = eop;
> +    let Inst{62-55} = inst;
> +    let Inst{63} = 1; // BARRIER
> +  }
> +
> +  } // End isTerminator = 1, usesCustomInserter = 1
> +
> +  defm : ExportPattern<R600_Export, 39>;
>  }
>  
>  // Helper pattern for normalizing inputs to triginomic instructions for R700+
> @@ -1024,6 +1101,50 @@ let Predicates = [isEGorCayman] in {
>    def : Pat<(fp_to_uint R600_Reg32:$src0),
>      (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>;
>  
> +  let isTerminator = 1, usesCustomInserter = 1 in {
> +
> +  def EG_Export :
> +  InstR600ISA<
> +        (outs),
> +        (ins R600_Reg128:$src, i32imm:$type, i32imm:$arraybase,
> +        i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst,
> +        i32imm:$eop),
> +        !strconcat("EXPORT", " $src"),
> +        []>{
> +    bits<13> arraybase;
> +    bits<2> type;
> +    bits<7> src;
> +
> +    bits<3> sw_x;
> +    bits<3> sw_y;
> +    bits<3> sw_z;
> +    bits<3> sw_w;
> +
> +    bits<1> eop;
> +    bits<8> inst;
> +
> +    let Inst{12-0} = arraybase;
> +    let Inst{14-13}   = type;
> +    let Inst{21-15} = src;
> +    let Inst{22} = 0; // RW_REL
> +    let Inst{29-23} = 0; // INDEX_GPR
> +    let Inst{31-30} = 3; // ELEM_SIZE
> +    let Inst{34-32} = sw_x;
> +    let Inst{37-35} = sw_y;
> +    let Inst{40-38} = sw_z;
> +    let Inst{43-41} = sw_w;
> +    let Inst{51-48} = 1; // BURST_COUNT
> +    let Inst{52} = 1; // VALID_PIXEL_MODE
> +    let Inst{53} = eop;
> +    let Inst{61-54} = inst;
> +    let Inst{62} = 0; // MARK
> +    let Inst{63} = 1; // BARRIER
> +  }
> +

It looks like R600_Export and EG_Export are nearly identical.  Could you
refactor this instruction definition into a class, so common parts can be
shared with R600_Export.  You can use the R600ALU_Word0, R600ALU_Word1,
R600ALU_Word1_OP2, and R600_1OP as an example of how to use inheritance
for the instruction encoding.

> +  } // End isTerminator = 1, usesCustomInserter = 1
> +
> +  defm : ExportPattern<EG_Export, 83>;
> +
>  //===----------------------------------------------------------------------===//
>  // Memory read/write instructions
>  //===----------------------------------------------------------------------===//
> diff --git a/lib/Target/AMDGPU/R600Intrinsics.td b/lib/Target/AMDGPU/R600Intrinsics.td
> index d661366..3a7e3ee 100644
> --- a/lib/Target/AMDGPU/R600Intrinsics.td
> +++ b/lib/Target/AMDGPU/R600Intrinsics.td
> @@ -19,4 +19,12 @@ let TargetPrefix = "R600", isTarget = 1 in {
>      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
>    def int_R600_load_input_linear :
>      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
> +  def int_R600_store_pixel_color :
> +      Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
> +  def int_R600_store_pixel_depth :
> +      Intrinsic<[], [llvm_float_ty], []>;
> +  def int_R600_store_pixel_stencil :
> +      Intrinsic<[], [llvm_float_ty], []>;
> +  def int_R600_store_pixel_dummy :
> +      Intrinsic<[], [], []>;
>  }
> diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
> index a31848e..49e662f 100644
> --- a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
> +++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
> @@ -15,7 +15,9 @@ R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
>    : MachineFunctionInfo(),
>      HasLinearInterpolation(false),
>      HasPerspectiveInterpolation(false)
> -  { }
> +  {
> +    memset(Outputs, 0, sizeof(Outputs));
> +  }
>  
>  unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const
>  {
> diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.h b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
> index 4444546..9f01379 100644
> --- a/lib/Target/AMDGPU/R600MachineFunctionInfo.h
> +++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
> @@ -16,6 +16,7 @@
>  #define R600MACHINEFUNCTIONINFO_H
>  
>  #include "llvm/CodeGen/MachineFunction.h"
> +#include "llvm/CodeGen/SelectionDAG.h"
>  #include <vector>
>  
>  namespace llvm {
> @@ -25,6 +26,7 @@ class R600MachineFunctionInfo : public MachineFunctionInfo {
>  public:
>    R600MachineFunctionInfo(const MachineFunction &MF);
>    std::vector<unsigned> ReservedRegs;
> +  SDNode *Outputs[16];
>    bool HasLinearInterpolation;
>    bool HasPerspectiveInterpolation;
>  
> -- 
> 1.7.11.7
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev