[Mesa-dev] [PATCH 2/2] radeon/llvm: add store.vertex.{position, param} intrinsics

Sun Dec 16 12:01:16 PST 2012

---
 lib/Target/AMDGPU/R600ISelLowering.cpp        | 51 +++++++++++++++++++++++----
 lib/Target/AMDGPU/R600Instructions.td         |  5 +++
 lib/Target/AMDGPU/R600Intrinsics.td           |  4 +++
 lib/Target/AMDGPU/R600MachineFunctionInfo.cpp |  1 +
 lib/Target/AMDGPU/R600MachineFunctionInfo.h   |  1 +
 5 files changed, 56 insertions(+), 6 deletions(-)

diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 6c594cc..5b9545e 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -350,8 +350,27 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
   }
   case AMDGPU::EG_ExportSwz:
   case AMDGPU::R600_ExportSwz: {
+    // Instruction is left unmodified if its not the last one of its type
+    bool isLastInstructionOfItsType;
+    {
+      isLastInstructionOfItsType = true;
+      unsigned InstExportType = MI->getOperand(1).getImm();
+      for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
+          EndBlock = BB->end(); NextExportInst != EndBlock;
+          NextExportInst = llvm::next(NextExportInst)) {
+        if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
+            NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
+          unsigned CurrentInstExportType = NextExportInst->getOperand(1)
+              .getImm();
+          if (CurrentInstExportType == InstExportType) {
+            isLastInstructionOfItsType = false;
+            break;
+          }
+        }
+      }
+    }
     bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
-    if (!EOP)
+    if (!EOP && !isLastInstructionOfItsType)
       return BB;
     unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
@@ -363,7 +382,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
             .addOperand(MI->getOperand(5))
             .addOperand(MI->getOperand(6))
             .addImm(CfInst)
-            .addImm(1);
+            .addImm(EOP);
     break;
   }
   }
@@ -382,7 +401,7 @@ using namespace llvm::AMDGPUIntrinsic;
 static SDValue
 InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap,
     unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type,
-    SDValue Scalar, SDValue Chain) {
+    unsigned ArrayBaseOffset, SDValue Scalar, SDValue Chain) {
   if (!ExportMap[Slot]) {
     SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
       DL, MVT::v4f32,
@@ -420,7 +439,7 @@ InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap,
   const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector,
       DAG.getConstant(Inst, MVT::i32),
       DAG.getConstant(Type, MVT::i32),
-      DAG.getConstant(Slot, MVT::i32),
+      DAG.getConstant(Slot + ArrayBaseOffset, MVT::i32),
       DAG.getConstant(Mask, MVT::i32)};
 
   DAG.UpdateNodeOperands(ExportInstruction,
@@ -464,7 +483,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
 
       SDNode **OutputsMap = MFI->Outputs;
       return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
-          RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2),
+          RegIndex / 4, RegIndex % 4, 0, 0, 0, Op.getOperand(2),
           Chain);
 
     }
@@ -498,7 +517,27 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
       }
 
       return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
-          RegIndex / 4, RegIndex % 4, Inst, 0, Op.getOperand(2),
+          RegIndex / 4, RegIndex % 4, Inst, 0, 0, Op.getOperand(2),
+          Chain);
+    }
+    case AMDGPUIntrinsic::R600_store_vertex_position: {
+      MachineFunction &MF = DAG.getMachineFunction();
+      R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+
+      SDNode **OutputsMap = MFI->Outputs;
+      return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
+          RegIndex / 4, RegIndex % 4, 0, 1, 60, Op.getOperand(2),
+          Chain);
+    }
+    case AMDGPUIntrinsic::R600_store_vertex_param: {
+      MachineFunction &MF = DAG.getMachineFunction();
+      R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+
+      SDNode **OutputsMap = MFI->VertexParamOutputs;
+      return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
+          RegIndex / 4, RegIndex % 4, 0, 2, 0, Op.getOperand(2),
           Chain);
     }
     // default for switch(IntrinsicID)
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index c3ffe97..3834df5 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -545,6 +545,11 @@ multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
         (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
         0, 1, 2, 3, cf_inst, 0)
   >;
+  def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 1),
+    (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)),
+        (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+        0, 1, 2, 3, cf_inst, 0)
+  >;
 }
 
 multiclass SteamOutputExportPattern<Instruction ExportInst,
diff --git a/lib/Target/AMDGPU/R600Intrinsics.td b/lib/Target/AMDGPU/R600Intrinsics.td
index 0186f9d..069fa07 100644
--- a/lib/Target/AMDGPU/R600Intrinsics.td
+++ b/lib/Target/AMDGPU/R600Intrinsics.td
@@ -21,6 +21,10 @@ let TargetPrefix = "R600", isTarget = 1 in {
     Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
   def int_R600_clipvertex :
     Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_R600_store_vertex_position :
+    Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
+  def int_R600_store_vertex_param :
+    Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
   def int_R600_store_stream_output :
     Intrinsic<[], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], []>;
   def int_R600_store_pixel_color :
diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
index 93b4608..ecc98ec 100644
--- a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
@@ -19,6 +19,7 @@ R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
     HasPerspectiveInterpolation(false) {
     memset(Outputs, 0, sizeof(Outputs));
     memset(StreamOutputs, 0, sizeof(StreamOutputs));
+    memset(VertexParamOutputs, 0, sizeof(StreamOutputs));
   }
 
 unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const {
diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.h b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
index 6cc875f..f7cede3 100644
--- a/lib/Target/AMDGPU/R600MachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
@@ -26,6 +26,7 @@ public:
   R600MachineFunctionInfo(const MachineFunction &MF);
   std::vector<unsigned> ReservedRegs;
   SDNode *Outputs[16];
+  SDNode *VertexParamOutputs[16];
   SDNode *StreamOutputs[64][4];
   BitVector IndirectChannels;
   bool HasLinearInterpolation;
-- 
1.8.0.2