[Mesa-dev] [PATCH 1/2] radeon/llvm: add an intrinsic converting clipvertex to clipdistance
Vincent Lejeune
vljn at ovi.com
Sun Dec 16 12:01:15 PST 2012
---
lib/Target/AMDGPU/R600ISelLowering.cpp | 21 ++++++++++++++++++++-
lib/Target/AMDGPU/R600Instructions.td | 12 ++++++++++++
lib/Target/AMDGPU/R600Intrinsics.td | 2 ++
lib/Target/AMDGPU/R600RegisterInfo.td | 21 ++++++++++++++++++++-
4 files changed, 54 insertions(+), 2 deletions(-)
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 3a4283c..6c594cc 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -328,7 +328,26 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
return BB;
}
-
+ case AMDGPU::ClipVertexAdjust: {
+ unsigned Temp[4];
+ for (unsigned i = 0; i < 4; i++) {
+ unsigned KcacheReg = AMDGPU::R600_KCache128RegClass.getRegister(i + 4 * MI->getOperand(2).getImm());
+ Temp[i] = MRI.createVirtualRegister(&AMDGPU::R600_Reg32RegClass);
+ BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::DOT4_r600_pseudo), Temp[i])
+ .addOperand(MI->getOperand(1))
+ .addReg(KcacheReg);
+ }
+ BuildMI(*BB, I, MI->getDebugLoc(), TII->get(TargetOpcode::REG_SEQUENCE), MI->getOperand(0).getReg())
+ .addReg(Temp[0])
+ .addImm(TII->getRegisterInfo().getSubRegFromChannel(0))
+ .addReg(Temp[1])
+ .addImm(TII->getRegisterInfo().getSubRegFromChannel(1))
+ .addReg(Temp[2])
+ .addImm(TII->getRegisterInfo().getSubRegFromChannel(2))
+ .addReg(Temp[3])
+ .addImm(TII->getRegisterInfo().getSubRegFromChannel(3));
+ break;
+ }
case AMDGPU::EG_ExportSwz:
case AMDGPU::R600_ExportSwz: {
bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index d89b03b..c3ffe97 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -598,6 +598,18 @@ class ExportBufInst : InstR600ISA<(
let Inst{63-32} = Word1;
}
+let usesCustomInserter = 1 in {
+
+def ClipVertexAdjust : AMDGPUInst <(outs R600_Reg128:$dst),
+ (ins R600_Reg128:$src0, i32imm:$src1),
+ "DOT4 $dst $src0",
+ [(set R600_Reg128:$dst,
+ (int_R600_clipvertex R600_Reg128:$src0, imm:$src1))]
+> {
+ field bits<64> Inst;
+}
+} // End usesCustomInserter = 1
+
let Predicates = [isR600toCayman] in {
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/R600Intrinsics.td b/lib/Target/AMDGPU/R600Intrinsics.td
index 3825bc4..0186f9d 100644
--- a/lib/Target/AMDGPU/R600Intrinsics.td
+++ b/lib/Target/AMDGPU/R600Intrinsics.td
@@ -19,6 +19,8 @@ let TargetPrefix = "R600", isTarget = 1 in {
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
def int_R600_load_input_linear :
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
+ def int_R600_clipvertex :
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_R600_store_stream_output :
Intrinsic<[], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], []>;
def int_R600_store_pixel_color :
diff --git a/lib/Target/AMDGPU/R600RegisterInfo.td b/lib/Target/AMDGPU/R600RegisterInfo.td
index 3b21825..67449d8 100644
--- a/lib/Target/AMDGPU/R600RegisterInfo.td
+++ b/lib/Target/AMDGPU/R600RegisterInfo.td
@@ -50,6 +50,19 @@ foreach Index = 448-464 in {
def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>;
}
+foreach Index = 160-168 in {
+ foreach Chan = [ "X", "Y", "Z", "W" ] in {
+ // 32-bit Temporary Registers
+ def K#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
+ }
+ def K#Index#_XYZW : R600Reg_128 <"K"#Index#".XYZW",
+ [!cast<Register>("K"#Index#"_X"),
+ !cast<Register>("K"#Index#"_Y"),
+ !cast<Register>("K"#Index#"_Z"),
+ !cast<Register>("K"#Index#"_W")],
+ Index>;
+}
+
// Special Registers
def ZERO : R600Reg<"0.0", 248>;
@@ -117,7 +130,13 @@ def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add
PREDICATE_BIT)>;
+def R600_KCache128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
+ (add (sequence "K%u_XYZW", 160, 168))> {
+ let isAllocatable = 0;
+}
+
def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
- (add (sequence "T%u_XYZW", 0, 127))> {
+ (add (sequence "T%u_XYZW", 0, 127),
+ R600_KCache128)> {
let CopyCost = -1;
}
--
1.8.0.2
More information about the mesa-dev
mailing list