[Mesa-dev] [PATCH 1/2] radeon/llvm: use specialised R600.store.pixel.* for fragment shader

Tom Stellard tom at stellard.net
Mon Oct 1 07:36:37 PDT 2012


On Sat, Sep 29, 2012 at 05:19:21PM +0200, Vincent Lejeune wrote:
> ---
>  src/gallium/drivers/radeon/AMDGPUISelLowering.cpp  |  1 +
>  src/gallium/drivers/radeon/AMDGPUISelLowering.h    |  1 +
>  .../radeon/MCTargetDesc/R600MCCodeEmitter.cpp      | 21 ++++++-
>  src/gallium/drivers/radeon/R600ISelLowering.cpp    | 66 ++++++++++++++++++++++
>  src/gallium/drivers/radeon/R600Instructions.td     | 57 +++++++++++++++++++
>  .../drivers/radeon/R600IntrinsicsNoOpenCL.td       |  6 ++
>  src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td |  6 ++
>  .../drivers/radeon/R600MachineFunctionInfo.cpp     |  4 +-
>  .../drivers/radeon/R600MachineFunctionInfo.h       |  2 +
>  9 files changed, 162 insertions(+), 2 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
> index 04dadc3..5cb4d87 100644
> --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
> +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
> @@ -348,5 +348,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
>    NODE_NAME_CASE(URECIP)
>    NODE_NAME_CASE(INTERP)
>    NODE_NAME_CASE(INTERP_P0)
> +  NODE_NAME_CASE(EXPORT)
>    }
>  }
> diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.h b/src/gallium/drivers/radeon/AMDGPUISelLowering.h
> index 2d8ed82..58d2287 100644
> --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.h
> +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.h
> @@ -121,6 +121,7 @@ enum
>    URECIP,
>    INTERP,
>    INTERP_P0,
> +  EXPORT,
>    LAST_AMDGPU_ISD_NUMBER
>  };
>  
> diff --git a/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp b/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp
> index a11f482..623676c 100644
> --- a/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp
> +++ b/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp
> @@ -108,7 +108,8 @@ enum InstrTypes {
>    INSTR_TEX,
>    INSTR_FC,
>    INSTR_NATIVE,
> -  INSTR_VTX
> +  INSTR_VTX,
> +  INSTR_EXPORT
>  };
>  
>  enum FCInstr {
> @@ -183,6 +184,24 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
>          Emit(InstWord2, OS);
>          break;
>        }
> +    case AMDGPU::R600_Export:
> +    case AMDGPU::R600_Export_DONE:
> +      {
> +        uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
> +        EmitByte(INSTR_EXPORT, OS);
> +        const MCOperand &MO = MI.getOperand(0);
> +        EmitByte(getHWReg(MO.getReg()), OS);
> +        if (MI.getOpcode() == AMDGPU::R600_Export)
> +          EmitByte(0x27, OS);
> +        else
> +          EmitByte(0x28, OS);
> +        EmitByte(MI.getOperand(2).getImm(), OS);
> +        EmitByte(MI.getOperand(3).getImm(), OS);
> +        EmitByte(MI.getOperand(4).getImm(), OS);
> +        EmitByte(MI.getOperand(5).getImm(), OS);
> +        EmitByte(MI.getOperand(6).getImm(), OS);
> +        break;
> +      }

Going forward, I would really like to have LLVM use the real encoding
for instructions rather than using the bytestream.  Could you change
the encoding to be more like what we do for VTX instructions?

>  
>      default:
>        EmitALUInstr(MI, Fixups, OS);
> diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp
> index 5dd2f53..cda7a38 100644
> --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp
> +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp
> @@ -264,6 +264,24 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
>  
>        return BB;
>      }
> +  case AMDGPU::R600_Export:
> +    {
> +      MachineBasicBlock::iterator SecondI = I;
> +      SecondI++;
> +      SecondI++;
> +      if (SecondI != BB->end())
> +        return BB;

A RAT_WRITE_* instruction checks if the next instruction is AMDIL::RETURN
to determine whether or it is the last instruction in the program.
I know this isn't really ideal, but could you do the same here, just to
be consistent.

> +      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::R600_Export_DONE))
> +            .addOperand(MI->getOperand(0))
> +            .addOperand(MI->getOperand(1))
> +            .addOperand(MI->getOperand(2))
> +            .addOperand(MI->getOperand(3))
> +            .addOperand(MI->getOperand(4))
> +            .addOperand(MI->getOperand(5))
> +            .addOperand(MI->getOperand(6));
> +      break;
> +    }
> +
>    }
>  
>    MI->eraseFromParent();
> @@ -300,6 +318,54 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
>        }
>        return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
>      }
> +    case AMDGPUIntrinsic::R600_store_pixel_color: {
> +      MachineFunction &MF = DAG.getMachineFunction();
> +      R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>();
> +      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
> +      unsigned Slot = RegIndex / 4;
> +
> +      void ** outputsMap = MFI->outputs;

I think think this can be SDNode** see comment below.

> +
> +      if (!outputsMap[Slot]) {
> +        SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
> +          Op.getDebugLoc(), MVT::v4f32,
> +          DAG.getUNDEF(MVT::v4f32),
> +          Op.getOperand(2),
> +          DAG.getConstant(RegIndex % 4, MVT::i32));
> +
> +        const SDValue Ops[8] = {Chain, Vector, DAG.getConstant(0, MVT::i32),
> +            DAG.getConstant(Slot, MVT::i32), DAG.getConstant(0, MVT::i32),
> +            DAG.getConstant(1, MVT::i32), DAG.getConstant(2, MVT::i32),
> +            DAG.getConstant(3, MVT::i32)};
> +
> +        SDValue Res =  DAG.getNode(
> +            AMDGPUISD::EXPORT,
> +            Op.getDebugLoc(),
> +            MVT::Other,
> +            Ops, 8);
> +         outputsMap[Slot] = Res.getNode();
> +         return Res;
> +      }
> +
> +      SDNode *ExportInstruction = (SDNode *) outputsMap[Slot] ;
> +      SDValue PreviousVector = ExportInstruction->getOperand(1);
> +      SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
> +          Op.getDebugLoc(), MVT::v4f32,
> +          PreviousVector,
> +          Op.getOperand(2),
> +          DAG.getConstant(RegIndex % 4, MVT::i32));
> +
> +      const SDValue Ops[8] = {ExportInstruction->getOperand(0), Vector, DAG.getConstant(0, MVT::i32),
> +          DAG.getConstant(Slot, MVT::i32), DAG.getConstant(0, MVT::i32),
> +          DAG.getConstant(1, MVT::i32), DAG.getConstant(2, MVT::i32),
> +          DAG.getConstant(3, MVT::i32)};
> +
> +      DAG.UpdateNodeOperands(ExportInstruction,
> +          Ops, 8);
> +
> +      return Chain;
> +    }
> +
>      // default for switch(IntrinsicID)
>      default: break;
>      }
> diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
> index 1689a2f..719060c 100644
> --- a/src/gallium/drivers/radeon/R600Instructions.td
> +++ b/src/gallium/drivers/radeon/R600Instructions.td
> @@ -74,7 +74,64 @@ class R600_ALU {
>  
>  def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
>                                       (ops PRED_SEL_OFF)>;
> +                                     
>  
> +def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
> +
> +def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType,
> +  [SDNPHasChain, SDNPSideEffect]>;
> +
> +let isTerminator = 1 in {
> +
> +let usesCustomInserter = 1 in {
> +
> +def R600_Export :
> +  InstR600 <0,
> +      (outs),
> +      (ins R600_Reg128:$src, i32imm:$type, i32imm:$arraybase,
> +      i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w),
> +      !strconcat("EXPORT", " $src"),
> +      [(EXPORT (v4f32 R600_Reg128:$src),
> +      (i32 imm:$type), (i32 imm:$arraybase),
> +      (i32 imm:$sw_x), (i32 imm:$sw_y), (i32 imm:$sw_z), (i32 imm:$sw_w))],
> +      AnyALU>{
> +  bits<7> dst;
> +  bits<9> src;
> +  let Inst{8-0}   = src;
> +  let Inst{49-39} = 0x27;
> +  let Inst{59-53} = dst;
> +}
> +
> +} // End usesCustomInserter = 1
> +
> +def R600_Export_DONE :
> +  InstR600 <0,
> +      (outs),
> +      (ins R600_Reg128:$src, i32imm:$type, i32imm:$arraybase,
> +      i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w),
> +      !strconcat("EXPORT_DONE", " $src"),
> +      [],
> +      AnyALU>{
> +  bits<7> dst;
> +  bits<9> src;
> +  let Inst{8-0}   = src;
> +  let Inst{49-39} = 0x27;
> +  let Inst{59-53} = dst;
> +}
> +
> +}// End isTerminator = 1
> +
> +def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
> +  (R600_Export
> +      (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
> +      0, 61, 0, 7, 7, 7)
> +>;
> +
> +def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
> +  (R600_Export
> +      (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
> +      0, 61, 7, 0, 7, 7)
> +>;
>  
>  class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
>                  InstrItinClass itin = AnyALU> :
> diff --git a/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td
> index 3b62f0a..e6241d2 100644
> --- a/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td
> +++ b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td
> @@ -23,6 +23,12 @@ let TargetPrefix = "R600", isTarget = 1 in {
>      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
>    def int_R600_load_input_face : 
>      Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>;
> +  def int_R600_store_pixel_color :
> +      Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
> +  def int_R600_store_pixel_depth :
> +      Intrinsic<[], [llvm_float_ty], []>;
> +    def int_R600_store_pixel_stencil :
> +      Intrinsic<[], [llvm_float_ty], []>;
>  }
>  
>  let TargetPrefix = "r600", isTarget = 1 in {
> diff --git a/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td
> index 00877ca..664d76b 100644
> --- a/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td
> +++ b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td
> @@ -23,4 +23,10 @@ let TargetPrefix = "R600", isTarget = 1 in {
>      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
>    def int_R600_load_input_face : 
>      Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>;
> +  def int_R600_store_pixel_color :
> +      Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
> +  def int_R600_store_pixel_depth :
> +      Intrinsic<[], [llvm_float_ty], []>;
> +    def int_R600_store_pixel_stencil :
> +      Intrinsic<[], [llvm_float_ty], []>;
>  }
> diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp
> index a31848e..1314468 100644
> --- a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp
> +++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp
> @@ -15,7 +15,9 @@ R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
>    : MachineFunctionInfo(),
>      HasLinearInterpolation(false),
>      HasPerspectiveInterpolation(false)
> -  { }
> +  {
> +    memset(outputs, 0, sizeof(outputs));
> +  }
>  
>  unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const
>  {
> diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h
> index 68211b2..40db4a4 100644
> --- a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h
> +++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h
> @@ -16,6 +16,7 @@
>  #define R600MACHINEFUNCTIONINFO_H
>  
>  #include "llvm/CodeGen/MachineFunction.h"
> +#include "llvm/ADT/DenseMap.h"

It doesn't look like this include is necessary.

>  #include <vector>
>  
>  namespace llvm {
> @@ -25,6 +26,7 @@ class R600MachineFunctionInfo : public MachineFunctionInfo {
>  public:
>    R600MachineFunctionInfo(const MachineFunction &MF);
>    std::vector<unsigned> ReservedRegs;
> +  void * outputs[16];

I think you can use SDNode* as the type here.

>    bool HasLinearInterpolation;
>    bool HasPerspectiveInterpolation;
>    
> -- 
> 1.7.11.4
>

-Tom
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list