[Mesa-dev] [PATCH] radeon/llvm: support for f32, v2f32, v3f32 store/load
Vincent Lejeune
vljn at ovi.com
Sun Nov 11 13:22:41 PST 2012
---
lib/Target/AMDGPU/AMDILDevice.cpp | 4 +-
lib/Target/AMDGPU/R600ISelLowering.cpp | 69 ++++++++++++++++++++++++----------
lib/Target/AMDGPU/R600Instructions.td | 4 +-
3 files changed, 54 insertions(+), 23 deletions(-)
diff --git a/lib/Target/AMDGPU/AMDILDevice.cpp b/lib/Target/AMDGPU/AMDILDevice.cpp
index 3955828..b440aa6 100644
--- a/lib/Target/AMDGPU/AMDILDevice.cpp
+++ b/lib/Target/AMDGPU/AMDILDevice.cpp
@@ -129,8 +129,8 @@ std::string
AMDGPUDevice::getDataLayout() const
{
return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
- "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
- "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+ "-i32:128:128-i64:128:128-f32:128:128-f64:128:128-f80:128:128"
+ "-v16:16:16-v24:32:32-v32:128:128-v48:128:128-v64:128:128"
"-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
"-v512:512:512-v1024:1024:1024-v2048:2048:2048"
"-n8:16:32:64");
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 712dd3f..e6418b2 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -94,15 +94,22 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::VSELECT, MVT::v4f32, Expand);
setOperationAction(ISD::VSELECT, MVT::v4i32, Expand);
// Legalize loads and stores to the private address space.
+ setOperationAction(ISD::LOAD, MVT::f32, Custom);
setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4f32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
setOperationAction(ISD::STORE, MVT::i8, Custom);
+ setOperationAction(ISD::STORE, MVT::f32, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v2f32, Custom);
+ setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v4f32, Custom);
+ setOperationAction(ISD::STORE, MVT::v4i32, Custom);
setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
@@ -522,6 +529,17 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
switch (N->getOpcode()) {
default: return;
case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
+ case ISD::LOAD: {
+ SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
+ Results.push_back(SDValue(Node, 0));
+ Results.push_back(SDValue(Node, 1));
+ return;
+ }
+ case ISD::STORE:
+ SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
+ Results.push_back(SDValue(Node, 0));
+ Results.push_back(SDValue(Node, 1));
+ return;
}
}
@@ -818,26 +836,32 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
return SDValue();
}
+ // LLVM generates byte-addresing pointers, but we need to convert this to a
+ // register index. Each register holds 16 bytes (4 x 32), so in order to
+ // get the register index, we need to divide the pointer by 16.
+ Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(4, MVT::i32));
+
if (VT.isVector()) {
+ unsigned NumElemVT = VT.getVectorNumElements();
EVT ElemVT = VT.getVectorElementType();
SDValue Loads[4];
- // LLVM generates byte-addresing pointers, but we need to convert this to a
- // register index. Each register holds 16 bytes (4 x 32), so in order to
- // get the register index, we need to divide the pointer by 16.
- Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(4, MVT::i32));
- for (unsigned i = 0; i < 4; ++i) {
+ for (unsigned i = 0; i < NumElemVT; ++i) {
Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
Chain, Ptr,
- DAG.getTargetConstant(i, MVT::i32), // Channel
+ DAG.getConstant(i, MVT::i32), // Channel
Op.getOperand(2));
}
- LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Loads, 4);
+ for (unsigned i = NumElemVT; i < 4; ++i) {
+ Loads[i] = DAG.getUNDEF(ElemVT);
+ }
+ EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
+ LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
} else {
LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
Chain, Ptr,
- DAG.getTargetConstant(0, MVT::i32), // Channel
+ DAG.getConstant(0, MVT::i32), // Channel
Op.getOperand(2));
}
@@ -863,32 +887,39 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const
return SDValue();
}
+ // LLVM generates byte-addresing pointers, but we need to convert this to a
+ // register index. Each register holds 16 bytes (4 x 32), so in order to
+ // get the register index, we need to divide the pointer by 16.
+ Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(4, MVT::i32));
+
if (VT.isVector()) {
+ unsigned NumElemVT = VT.getVectorNumElements();
EVT ElemVT = VT.getVectorElementType();
SDValue Stores[4];
- // LLVM generates byte-addresing pointers, but we need to convert this to a
- // register index. Each register holds 16 bytes (4 x 32), so in order to
- // get the register index, we need to divide the pointer by 16.
- Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(4, MVT::i32));
-
- for (unsigned i = 0; i < 4; ++i) {
+ for (unsigned i = 0; i < NumElemVT; ++i) {
SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Value, DAG.getConstant(i, MVT::i32));
Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
Chain, Elem, Ptr,
- DAG.getTargetConstant(i, MVT::i32)); // Channel
+ DAG.getConstant(i, MVT::i32)); // Channel
MFI->IndirectChannels.set(i);
}
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, 4);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
} else {
if (VT == MVT::i8) {
Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
}
+ // We can go here with f32 elements from a v3f32.
+ // Such elements have a non 16 bytes aligned addresses that we can use
+ SDValue Channel = DAG.getNode(ISD::AND, DL, MVT::i32, Op.getOperand(2),
+ DAG.getConstant(15, MVT::i32));
+ Channel = DAG.getNode(ISD::SRL, DL, MVT::i32, Channel,
+ DAG.getConstant(2, MVT::i32));
Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
- DAG.getTargetConstant(0, MVT::i32)); // Channel
+ Channel); // Channel
MFI->IndirectChannels.set(0);
}
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index d081824..09183e8 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -1389,14 +1389,14 @@ class RegisterLoad <ValueType vt> : InstR600 <0x0,
(outs R600_Reg32:$dst), (ins FRAMEri:$addr, i32imm:$chan),
"RegisterLoad $dst, $addr",
[(set (vt R600_Reg32:$dst), (REGISTER_LOAD ADDRIndirect:$addr,
- (i32 timm:$chan)))],
+ (i32 imm:$chan)))],
NullALU
>;
class RegisterStore <ValueType vt> : InstR600 <0x0,
(outs), (ins R600_Reg32:$val, FRAMEri:$addr, i32imm:$chan),
"RegisterStore_i32 $val, $addr",
- [(REGISTER_STORE (vt R600_Reg32:$val), ADDRIndirect:$addr, (i32 timm:$chan))],
+ [(REGISTER_STORE (vt R600_Reg32:$val), ADDRIndirect:$addr, (i32 imm:$chan))],
NullALU
>;
--
1.7.11.7
More information about the mesa-dev
mailing list