[Mesa-dev] [PATCH 2/2] radeon/llvm: add store.vertex.{position, param} intrinsics
Vincent Lejeune
vljn at ovi.com
Mon Dec 17 09:42:03 PST 2012
---
lib/Target/AMDGPU/R600ISelLowering.cpp | 51 +++++++++++++++++++++++----
lib/Target/AMDGPU/R600Instructions.td | 5 +++
lib/Target/AMDGPU/R600Intrinsics.td | 4 +++
lib/Target/AMDGPU/R600MachineFunctionInfo.cpp | 1 +
lib/Target/AMDGPU/R600MachineFunctionInfo.h | 1 +
5 files changed, 56 insertions(+), 6 deletions(-)
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 16d2280..0825172 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -386,8 +386,27 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
case AMDGPU::EG_ExportSwz:
case AMDGPU::R600_ExportSwz: {
+ // Instruction is left unmodified if its not the last one of its type
+ bool isLastInstructionOfItsType;
+ {
+ isLastInstructionOfItsType = true;
+ unsigned InstExportType = MI->getOperand(1).getImm();
+ for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
+ EndBlock = BB->end(); NextExportInst != EndBlock;
+ NextExportInst = llvm::next(NextExportInst)) {
+ if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
+ NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
+ unsigned CurrentInstExportType = NextExportInst->getOperand(1)
+ .getImm();
+ if (CurrentInstExportType == InstExportType) {
+ isLastInstructionOfItsType = false;
+ break;
+ }
+ }
+ }
+ }
bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
- if (!EOP)
+ if (!EOP && !isLastInstructionOfItsType)
return BB;
unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
@@ -399,7 +418,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
.addOperand(MI->getOperand(5))
.addOperand(MI->getOperand(6))
.addImm(CfInst)
- .addImm(1);
+ .addImm(EOP);
break;
}
}
@@ -418,7 +437,7 @@ using namespace llvm::AMDGPUIntrinsic;
static SDValue
InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap,
unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type,
- SDValue Scalar, SDValue Chain) {
+ unsigned ArrayBaseOffset, SDValue Scalar, SDValue Chain) {
if (!ExportMap[Slot]) {
SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
DL, MVT::v4f32,
@@ -456,7 +475,7 @@ InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap,
const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector,
DAG.getConstant(Inst, MVT::i32),
DAG.getConstant(Type, MVT::i32),
- DAG.getConstant(Slot, MVT::i32),
+ DAG.getConstant(Slot + ArrayBaseOffset, MVT::i32),
DAG.getConstant(Mask, MVT::i32)};
DAG.UpdateNodeOperands(ExportInstruction,
@@ -500,7 +519,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
SDNode **OutputsMap = MFI->Outputs;
return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
- RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2),
+ RegIndex / 4, RegIndex % 4, 0, 0, 0, Op.getOperand(2),
Chain);
}
@@ -534,7 +553,27 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
}
return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
- RegIndex / 4, RegIndex % 4, Inst, 0, Op.getOperand(2),
+ RegIndex / 4, RegIndex % 4, Inst, 0, 0, Op.getOperand(2),
+ Chain);
+ }
+ case AMDGPUIntrinsic::R600_store_vertex_position: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+ int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+
+ SDNode **OutputsMap = MFI->Outputs;
+ return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
+ RegIndex / 4, RegIndex % 4, 0, 1, 60, Op.getOperand(2),
+ Chain);
+ }
+ case AMDGPUIntrinsic::R600_store_vertex_param: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+ int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+
+ SDNode **OutputsMap = MFI->VertexParamOutputs;
+ return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
+ RegIndex / 4, RegIndex % 4, 0, 2, 0, Op.getOperand(2),
Chain);
}
// default for switch(IntrinsicID)
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index 621ed09..eb59a4e 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -545,6 +545,11 @@ multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
(ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
0, 1, 2, 3, cf_inst, 0)
>;
+ def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 1),
+ (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)),
+ (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ 0, 1, 2, 3, cf_inst, 0)
+ >;
}
multiclass SteamOutputExportPattern<Instruction ExportInst,
diff --git a/lib/Target/AMDGPU/R600Intrinsics.td b/lib/Target/AMDGPU/R600Intrinsics.td
index 3825bc4..b09dec7 100644
--- a/lib/Target/AMDGPU/R600Intrinsics.td
+++ b/lib/Target/AMDGPU/R600Intrinsics.td
@@ -19,6 +19,10 @@ let TargetPrefix = "R600", isTarget = 1 in {
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
def int_R600_load_input_linear :
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
+ def int_R600_store_vertex_position :
+ Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
+ def int_R600_store_vertex_param :
+ Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
def int_R600_store_stream_output :
Intrinsic<[], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], []>;
def int_R600_store_pixel_color :
diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
index 93b4608..ecc98ec 100644
--- a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
@@ -19,6 +19,7 @@ R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
HasPerspectiveInterpolation(false) {
memset(Outputs, 0, sizeof(Outputs));
memset(StreamOutputs, 0, sizeof(StreamOutputs));
+ memset(VertexParamOutputs, 0, sizeof(StreamOutputs));
}
unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const {
diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.h b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
index 6cc875f..f7cede3 100644
--- a/lib/Target/AMDGPU/R600MachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
@@ -26,6 +26,7 @@ public:
R600MachineFunctionInfo(const MachineFunction &MF);
std::vector<unsigned> ReservedRegs;
SDNode *Outputs[16];
+ SDNode *VertexParamOutputs[16];
SDNode *StreamOutputs[64][4];
BitVector IndirectChannels;
bool HasLinearInterpolation;
--
1.8.0.2
More information about the mesa-dev
mailing list