[Mesa-dev] [PATCH 3/3] radeon/llvm: Add an intrinsic to handle stream outputs.
Vincent Lejeune
vljn at ovi.com
Sat Dec 8 09:10:19 PST 2012
---
.../AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp | 2 +
lib/Target/AMDGPU/R600ISelLowering.cpp | 31 +++++++++++
lib/Target/AMDGPU/R600Instructions.td | 65 ++++++++++++++++++++++
lib/Target/AMDGPU/R600Intrinsics.td | 2 +
lib/Target/AMDGPU/R600MachineFunctionInfo.cpp | 1 +
lib/Target/AMDGPU/R600MachineFunctionInfo.h | 1 +
6 files changed, 102 insertions(+)
diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index cb89ebe..018234a 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -176,6 +176,8 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
}
case AMDGPU::EG_ExportSwz:
case AMDGPU::R600_ExportSwz:
+ case AMDGPU::EG_ExportBuf:
+ case AMDGPU::R600_ExportBuf: {
uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
EmitByte(INSTR_EXPORT, OS);
Emit(Inst, OS);
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 1b6d909..3a4283c 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -448,8 +448,39 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2),
Chain);
+ }
+ case AMDGPUIntrinsic::R600_store_stream_output : {
+ MachineFunction &MF = DAG.getMachineFunction();
+ R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+ int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ int64_t BufIndex = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+
+ SDNode **OutputsMap = MFI->StreamOutputs[BufIndex];
+ unsigned Inst;
+ switch (cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue() ) {
+ // STREAM3
+ case 3:
+ Inst = 4;
+ break;
+ // STREAM2
+ case 2:
+ Inst = 3;
+ break;
+ // STREAM1
+ case 1:
+ Inst = 2;
+ break;
+ // STREAM0
+ case 0:
+ Inst = 1;
+ break;
+ default:
+ assert(0 && "Wrong buffer id for stream outputs !");
}
+ return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
+ RegIndex / 4, RegIndex % 4, Inst, 0, Op.getOperand(2),
+ Chain);
}
// default for switch(IntrinsicID)
default: break;
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index 7c1df72..d89b03b 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -510,6 +510,18 @@ class ExportSwzWord1 {
let Word1{11-9} = sw_w;
}
+class ExportBufWord1 {
+ field bits<32> Word1;
+
+ bits<12> arraySize;
+ bits<4> compMask;
+ bits<1> eop;
+ bits<8> inst;
+
+ let Word1{11-0} = arraySize;
+ let Word1{15-12} = compMask;
+}
+
multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
(ExportInst
@@ -535,6 +547,30 @@ multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
>;
}
+multiclass SteamOutputExportPattern<Instruction ExportInst,
+ bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> {
+// Stream0
+ def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 1),
+ (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ 4095, imm:$mask, buf0inst, 0)>;
+// Stream1
+ def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 2),
+ (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ 4095, imm:$mask, buf1inst, 0)>;
+// Stream2
+ def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 3),
+ (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ 4095, imm:$mask, buf2inst, 0)>;
+// Stream3
+ def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 4),
+ (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ 4095, imm:$mask, buf3inst, 0)>;
+}
+
let isTerminator = 1, usesCustomInserter = 1 in {
class ExportSwzInst : InstR600ISA<(
@@ -551,6 +587,16 @@ class ExportSwzInst : InstR600ISA<(
} // End isTerminator = 1, usesCustomInserter = 1
+class ExportBufInst : InstR600ISA<(
+ outs),
+ (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
+ i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop),
+ !strconcat("EXPORT", " $gpr"),
+ []>, ExportWord0, ExportBufWord1 {
+ let elem_size = 0;
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
let Predicates = [isR600toCayman] in {
@@ -1001,6 +1047,15 @@ let Predicates = [isR600] in {
let Word1{31} = 1; // BARRIER
}
defm : ExportPattern<R600_ExportSwz, 39>;
+
+ def R600_ExportBuf : ExportBufInst {
+ let Word1{20-17} = 1; // BURST_COUNT
+ let Word1{21} = eop;
+ let Word1{22} = 1; // VALID_PIXEL_MODE
+ let Word1{30-23} = inst;
+ let Word1{31} = 1; // BARRIER
+ }
+ defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
}
// Helper pattern for normalizing inputs to triginomic instructions for R700+
@@ -1145,6 +1200,16 @@ let hasSideEffects = 1 in {
}
defm : ExportPattern<EG_ExportSwz, 83>;
+ def EG_ExportBuf : ExportBufInst {
+ let Word1{19-16} = 1; // BURST_COUNT
+ let Word1{20} = 1; // VALID_PIXEL_MODE
+ let Word1{21} = eop;
+ let Word1{29-22} = inst;
+ let Word1{30} = 0; // MARK
+ let Word1{31} = 1; // BARRIER
+ }
+ defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
+
//===----------------------------------------------------------------------===//
// Memory read/write instructions
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/R600Intrinsics.td b/lib/Target/AMDGPU/R600Intrinsics.td
index 3a7e3ee..3825bc4 100644
--- a/lib/Target/AMDGPU/R600Intrinsics.td
+++ b/lib/Target/AMDGPU/R600Intrinsics.td
@@ -19,6 +19,8 @@ let TargetPrefix = "R600", isTarget = 1 in {
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
def int_R600_load_input_linear :
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
+ def int_R600_store_stream_output :
+ Intrinsic<[], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], []>;
def int_R600_store_pixel_color :
Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
def int_R600_store_pixel_depth :
diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
index 1885016..93b4608 100644
--- a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
@@ -18,6 +18,7 @@ R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
HasLinearInterpolation(false),
HasPerspectiveInterpolation(false) {
memset(Outputs, 0, sizeof(Outputs));
+ memset(StreamOutputs, 0, sizeof(StreamOutputs));
}
unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const {
diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.h b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
index 7bc7d51..6cc875f 100644
--- a/lib/Target/AMDGPU/R600MachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
@@ -26,6 +26,7 @@ public:
R600MachineFunctionInfo(const MachineFunction &MF);
std::vector<unsigned> ReservedRegs;
SDNode *Outputs[16];
+ SDNode *StreamOutputs[64][4];
BitVector IndirectChannels;
bool HasLinearInterpolation;
bool HasPerspectiveInterpolation;
--
1.8.0.1
More information about the mesa-dev
mailing list