[Mesa-dev] [PATCH 1/2] radeon/llvm: support for v2f32 store/load

Fri Nov 16 08:04:38 PST 2012

---
 lib/Target/AMDGPU/R600ISelLowering.cpp | 67 +++++++++++++++++++++++++++++++---
 1 file changed, 61 insertions(+), 6 deletions(-)

diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index f2de35f..3adc5d6 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -96,15 +96,22 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::VSELECT, MVT::v4f32, Expand);
   setOperationAction(ISD::VSELECT, MVT::v4i32, Expand);
   // Legalize loads and stores to the private address space.
+  setOperationAction(ISD::LOAD, MVT::f32, Custom);
   setOperationAction(ISD::LOAD, MVT::i32, Custom);
+  setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
+  setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
   setOperationAction(ISD::LOAD, MVT::v4f32, Custom);
   setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
   setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
   setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
   setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
   setOperationAction(ISD::STORE, MVT::i8, Custom);
+  setOperationAction(ISD::STORE, MVT::f32, Custom);
   setOperationAction(ISD::STORE, MVT::i32, Custom);
+  setOperationAction(ISD::STORE, MVT::v2f32, Custom);
+  setOperationAction(ISD::STORE, MVT::v2i32, Custom);
   setOperationAction(ISD::STORE, MVT::v4f32, Custom);
+  setOperationAction(ISD::STORE, MVT::v4i32, Custom);
 
   setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
 
@@ -530,6 +537,20 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
   switch (N->getOpcode()) {
   default: return;
   case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
+    return;
+  case ISD::LOAD: {
+    SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
+    Results.push_back(SDValue(Node, 0));
+    Results.push_back(SDValue(Node, 1));
+    // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
+    // function
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
+    return;
+  }
+  case ISD::STORE:
+    SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
+    Results.push_back(SDValue(Node, 0));
+    return;
   }
 }
 
@@ -827,22 +848,41 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
   }
 
   if (VT.isVector()) {
+    unsigned NumElemVT = VT.getVectorNumElements();
     EVT ElemVT = VT.getVectorElementType();
     SDValue Loads[4];
+    unsigned SRLpad;
+
+    switch (NumElemVT) {
+    case 4:
+      SRLpad = 4;
+      break;
+    case 2:
+      SRLpad = 3;
+      break;
+    default:
+      assert(0 && "Vector size not supported");
+    }
     // LLVM generates byte-addresing pointers, but we need to convert this to a
     // register index.  Each register holds 16 bytes (4 x 32), so in order to
     // get the register index, we need to divide the pointer by 16.
     Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(4, MVT::i32));
+                      DAG.getConstant(SRLpad, MVT::i32));
 
-    for (unsigned i = 0; i < 4; ++i) {
+    for (unsigned i = 0; i < NumElemVT; ++i) {
       Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
                              Chain, Ptr,
                              DAG.getTargetConstant(i, MVT::i32), // Channel
                              Op.getOperand(2));
     }
-    LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Loads, 4);
+    for (unsigned i = NumElemVT; i < 4; ++i) {
+      Loads[i] = DAG.getUNDEF(ElemVT);
+    }
+    EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
+    LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
   } else {
+    Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
+                      DAG.getConstant(2, MVT::i32));
     LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
                               Chain, Ptr,
                               DAG.getTargetConstant(0, MVT::i32), // Channel
@@ -872,16 +912,29 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const
   }
 
   if (VT.isVector()) {
+    unsigned NumElemVT = VT.getVectorNumElements();
     EVT ElemVT = VT.getVectorElementType();
     SDValue Stores[4];
+    unsigned SRLpad;
 
     // LLVM generates byte-addresing pointers, but we need to convert this to a
     // register index.  Each register holds 16 bytes (4 x 32), so in order to
     // get the register index, we need to divide the pointer by 16.
+    switch (NumElemVT) {
+    case 4:
+      SRLpad = 4;
+      break;
+    case 2:
+      SRLpad = 3;
+      break;
+    default:
+      assert(0 && "Vector size not supported");
+    }
+
     Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(4, MVT::i32));
+                      DAG.getConstant(SRLpad, MVT::i32));
 
-    for (unsigned i = 0; i < 4; ++i) {
+    for (unsigned i = 0; i < NumElemVT; ++i) {
       SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
                                  Value, DAG.getConstant(i, MVT::i32));
 
@@ -890,11 +943,13 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const
                               DAG.getTargetConstant(i, MVT::i32)); // Channel
       MFI->IndirectChannels.set(i);
     }
-     Chain =  DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, 4);
+     Chain =  DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
    } else {
     if (VT == MVT::i8) {
       Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
     }
+    Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
+                      DAG.getConstant(2, MVT::i32));
     Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
     DAG.getTargetConstant(0, MVT::i32)); // Channel 
     MFI->IndirectChannels.set(0);
-- 
1.7.11.7