[Mesa-dev] [PATCH 1/2] radeon/llvm: add an intrinsic converting clipvertex to clipdistance

Vincent Lejeune vljn at ovi.com
Sun Dec 16 12:01:15 PST 2012


---
 lib/Target/AMDGPU/R600ISelLowering.cpp | 21 ++++++++++++++++++++-
 lib/Target/AMDGPU/R600Instructions.td  | 12 ++++++++++++
 lib/Target/AMDGPU/R600Intrinsics.td    |  2 ++
 lib/Target/AMDGPU/R600RegisterInfo.td  | 21 ++++++++++++++++++++-
 4 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 3a4283c..6c594cc 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -328,7 +328,26 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
 
     return BB;
   }
-
+  case AMDGPU::ClipVertexAdjust: {
+    unsigned Temp[4];
+    for (unsigned i = 0; i < 4; i++) {
+      unsigned KcacheReg = AMDGPU::R600_KCache128RegClass.getRegister(i + 4 * MI->getOperand(2).getImm());
+      Temp[i] = MRI.createVirtualRegister(&AMDGPU::R600_Reg32RegClass);
+       BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::DOT4_r600_pseudo), Temp[i])
+           .addOperand(MI->getOperand(1))
+           .addReg(KcacheReg);
+    }
+    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(TargetOpcode::REG_SEQUENCE), MI->getOperand(0).getReg())
+        .addReg(Temp[0])
+        .addImm(TII->getRegisterInfo().getSubRegFromChannel(0))
+        .addReg(Temp[1])
+        .addImm(TII->getRegisterInfo().getSubRegFromChannel(1))
+        .addReg(Temp[2])
+        .addImm(TII->getRegisterInfo().getSubRegFromChannel(2))
+        .addReg(Temp[3])
+        .addImm(TII->getRegisterInfo().getSubRegFromChannel(3));
+    break;
+  }
   case AMDGPU::EG_ExportSwz:
   case AMDGPU::R600_ExportSwz: {
     bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index d89b03b..c3ffe97 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -598,6 +598,18 @@ class ExportBufInst : InstR600ISA<(
   let Inst{63-32} = Word1;
 }
 
+let usesCustomInserter = 1 in {
+
+def ClipVertexAdjust : AMDGPUInst <(outs R600_Reg128:$dst),
+  (ins R600_Reg128:$src0, i32imm:$src1),
+  "DOT4 $dst $src0",
+  [(set R600_Reg128:$dst,
+      (int_R600_clipvertex R600_Reg128:$src0, imm:$src1))]
+> {
+  field bits<64> Inst;
+}
+} // End usesCustomInserter = 1
+
 let Predicates = [isR600toCayman] in { 
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/R600Intrinsics.td b/lib/Target/AMDGPU/R600Intrinsics.td
index 3825bc4..0186f9d 100644
--- a/lib/Target/AMDGPU/R600Intrinsics.td
+++ b/lib/Target/AMDGPU/R600Intrinsics.td
@@ -19,6 +19,8 @@ let TargetPrefix = "R600", isTarget = 1 in {
     Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
   def int_R600_load_input_linear :
     Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
+  def int_R600_clipvertex :
+    Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_R600_store_stream_output :
     Intrinsic<[], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], []>;
   def int_R600_store_pixel_color :
diff --git a/lib/Target/AMDGPU/R600RegisterInfo.td b/lib/Target/AMDGPU/R600RegisterInfo.td
index 3b21825..67449d8 100644
--- a/lib/Target/AMDGPU/R600RegisterInfo.td
+++ b/lib/Target/AMDGPU/R600RegisterInfo.td
@@ -50,6 +50,19 @@ foreach Index = 448-464 in {
   def ArrayBase#Index :  R600Reg<"ARRAY_BASE", Index>;
 }
 
+foreach Index = 160-168 in {
+  foreach Chan = [ "X", "Y", "Z", "W" ] in {
+    // 32-bit Temporary Registers
+    def K#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
+  }
+  def K#Index#_XYZW : R600Reg_128 <"K"#Index#".XYZW",
+                                   [!cast<Register>("K"#Index#"_X"),
+                                   !cast<Register>("K"#Index#"_Y"),
+                                   !cast<Register>("K"#Index#"_Z"),
+                                   !cast<Register>("K"#Index#"_W")],
+                                   Index>;
+}
+
 // Special Registers
 
 def ZERO : R600Reg<"0.0", 248>;
@@ -117,7 +130,13 @@ def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
 def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add
     PREDICATE_BIT)>;
 
+def R600_KCache128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
+                                (add (sequence "K%u_XYZW", 160, 168))> {
+  let isAllocatable = 0;
+}
+
 def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
-                                (add (sequence "T%u_XYZW", 0, 127))> {
+                                (add (sequence "T%u_XYZW", 0, 127),
+                                     R600_KCache128)> {
   let CopyCost = -1;
 }
-- 
1.8.0.2



More information about the mesa-dev mailing list