[Mesa-dev] [PATCH 4/5] R600: use pointers for constants

Tue Jan 8 13:42:28 PST 2013

---
 lib/Target/R600/AMDGPU.h                           |   1 +
 lib/Target/R600/AMDGPUTargetMachine.cpp            |   1 +
 lib/Target/R600/AMDIL.h                            |  19 +++-
 lib/Target/R600/AMDILISelDAGToDAG.cpp              | 104 +++++----------------
 lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp |   3 +-
 lib/Target/R600/R600ISelLowering.cpp               | 103 ++++++++++++++++++++
 lib/Target/R600/R600ISelLowering.h                 |   1 +
 lib/Target/R600/R600Instructions.td                |  74 +++++++++++++++
 lib/Target/R600/R600LowerConstCopy.cpp             |  74 +++++++++++++++
 9 files changed, 296 insertions(+), 84 deletions(-)
 create mode 100644 lib/Target/R600/R600LowerConstCopy.cpp

diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index 0f5125d..22351bf 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -23,6 +23,7 @@ class AMDGPUTargetMachine;
 // R600 Passes
 FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
 FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
+FunctionPass *createR600LowerConstCopy(TargetMachine &tm);
 
 // SI Passes
 FunctionPass *createSIAnnotateControlFlowPass();
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index d09dc2e..ad0d434 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -131,6 +131,7 @@ bool AMDGPUPassConfig::addPreEmitPass() {
     addPass(createAMDGPUCFGPreparationPass(*TM));
     addPass(createAMDGPUCFGStructurizerPass(*TM));
     addPass(createR600ExpandSpecialInstrsPass(*TM));
+    addPass(createR600LowerConstCopy(*TM));
     addPass(&FinalizeMachineBundlesID);
   } else {
     addPass(createSILowerLiteralConstantsPass(*TM));
diff --git a/lib/Target/R600/AMDIL.h b/lib/Target/R600/AMDIL.h
index 4e577dc..dee51bc 100644
--- a/lib/Target/R600/AMDIL.h
+++ b/lib/Target/R600/AMDIL.h
@@ -90,14 +90,29 @@ namespace AMDGPUAS {
 enum AddressSpaces {
   PRIVATE_ADDRESS  = 0, ///< Address space for private memory.
   GLOBAL_ADDRESS   = 1, ///< Address space for global memory (RAT0, VTX0).
-  CONSTANT_ADDRESS = 2, ///< Address space for constant memory.
+  CONSTANT_ADDRESS = 2, ///< Address space for constant memory (aka CONSTANT_BUFFER_0)
   LOCAL_ADDRESS    = 3, ///< Address space for local memory.
   REGION_ADDRESS   = 4, ///< Address space for region memory.
   ADDRESS_NONE     = 5, ///< Address space for unknown memory.
   PARAM_D_ADDRESS  = 6, ///< Address space for direct addressible parameter memory (CONST0)
   PARAM_I_ADDRESS  = 7, ///< Address space for indirect addressible parameter memory (VTX1)
   USER_SGPR_ADDRESS = 8, ///< Address space for USER_SGPRS on SI
-  LAST_ADDRESS     = 9
+  CONSTANT_BUFFER_1 = 9,
+  CONSTANT_BUFFER_2 = 10,
+  CONSTANT_BUFFER_3 = 11,
+  CONSTANT_BUFFER_4 = 12,
+  CONSTANT_BUFFER_5 = 13,
+  CONSTANT_BUFFER_6 = 14,
+  CONSTANT_BUFFER_7 = 15,
+  CONSTANT_BUFFER_8 = 16,
+  CONSTANT_BUFFER_9 = 17,
+  CONSTANT_BUFFER_10 = 18,
+  CONSTANT_BUFFER_11 = 19,
+  CONSTANT_BUFFER_12 = 20,
+  CONSTANT_BUFFER_13 = 21,
+  CONSTANT_BUFFER_14 = 22,
+  CONSTANT_BUFFER_15 = 23,
+  LAST_ADDRESS     = 24
 };
 
 } // namespace AMDGPUAS
diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp
index 725a5e4..8fbf153 100644
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 #include <list>
 #include <queue>
 
@@ -67,6 +68,9 @@ private:
   static bool isLocalLoad(const LoadSDNode *N);
   static bool isRegionLoad(const LoadSDNode *N);
 
+  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
+  bool SelectGlobalValueVariableOffset(SDValue Addr,
+      SDValue &BaseReg, SDValue& Offset);
   bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
   bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
@@ -258,87 +262,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
     }
     break;
   }
-
-  case ISD::INTRINSIC_WO_CHAIN: {
-    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
-    if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
-      unsigned IntrinsicID = N->getConstantOperandVal(0);
-      switch (IntrinsicID) {
-      case AMDGPUIntrinsic::AMDGPU_load_const: {
-        const R600InstrInfo *TII =
-            static_cast<const R600InstrInfo*>(CurDAG->getTarget().getInstrInfo());
-        int CSel = N->getConstantOperandVal(1);
-        std::vector<SDValue> Ops;
-
-        // We'll try to fold the const in the alu insts with native operands
-        for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
-            Use != SDNode::use_end(); Use = Next) {
-          Next = llvm::next(Use);
-
-          if (!Use->isMachineOpcode())
-            continue;
-
-          unsigned Opcode = Use->getMachineOpcode();
-
-          const MCInstrDesc & MCDesc = TII->get(Opcode);
-          if (HAS_NATIVE_OPERANDS(MCDesc.TSFlags)) {
-
-            unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 :
-                MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1;
-
-            const unsigned SrcOps[3][2] = {
-                {R600Operands::SRC0, R600Operands::SRC0_SEL},
-                {R600Operands::SRC1, R600Operands::SRC1_SEL},
-                {R600Operands::SRC2, R600Operands::SRC2_SEL}
-            };
-
-            Ops.clear();
-            for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
-              Ops.push_back(Use->getOperand(i));
-            }
-
-            int RegOpIdx = Use.getOperandNo();
-            unsigned SrcIdx;
-
-            for (SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) {
-              if (R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]] ==
-                  RegOpIdx + 1)
-                break;
-            }
-
-            assert(SrcIdx < SrcNum && "ALU const folding: invalid operand");
-
-            int SelOpIdx =
-                R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]] - 1;
-
-            Ops[RegOpIdx] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
-            Ops[SelOpIdx] = CurDAG->getTargetConstant(CSel, MVT::i32);
-
-            CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
-          }
-        }
-
-        Ops.clear();
-        Ops.push_back(CurDAG->getTargetConstant(1, MVT::i32));
-        Ops.push_back(CurDAG->getTargetConstant(0, MVT::i32));
-        Ops.push_back(CurDAG->getTargetConstant(0, MVT::i32));
-        Ops.push_back(CurDAG->getTargetConstant(0, MVT::i32));
-        Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32));
-        Ops.push_back(CurDAG->getTargetConstant(0, MVT::i32));
-        Ops.push_back(CurDAG->getTargetConstant(0, MVT::i32));
-        Ops.push_back(CurDAG->getTargetConstant(0, MVT::i32));
-        Ops.push_back(CurDAG->getTargetConstant(CSel, MVT::i32));
-        Ops.push_back(CurDAG->getTargetConstant(1, MVT::i32));
-        Ops.push_back(CurDAG->getRegister(AMDGPU::PRED_SEL_OFF, MVT::f32));
-        Ops.push_back(CurDAG->getTargetConstant(0, MVT::i32));
-
-        return CurDAG->SelectNodeTo(N, AMDGPU::MOV, MVT::f32, Ops.data(),
-                Ops.size());
-
-      }
-      }
-    }
-    }
   }
   return SelectCode(N);
 }
@@ -487,6 +410,25 @@ const char *AMDGPUDAGToDAGISel::getPassName() const {
 
 ///==== AMDGPU Functions ====///
 
+bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
+    SDValue& IntPtr) {
+  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
+    IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
+    return true;
+  }
+  return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
+    SDValue& BaseReg, SDValue &Offset) {
+  if (!dyn_cast<ConstantSDNode>(Addr)) {
+    BaseReg = Addr;
+    Offset = CurDAG->getIntPtrConstant(0, true);
+    return true;
+  }
+  return false;
+}
+
 bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
                                              SDValue& Offset) {
   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index e357598..e061b18 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -164,7 +164,8 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     case AMDGPU::VTX_READ_PARAM_32_eg:
     case AMDGPU::VTX_READ_GLOBAL_8_eg:
     case AMDGPU::VTX_READ_GLOBAL_32_eg:
-    case AMDGPU::VTX_READ_GLOBAL_128_eg: {
+    case AMDGPU::VTX_READ_GLOBAL_128_eg:
+    case AMDGPU::TEX_VTX_CONSTBUF: {
       uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
       uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
 
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index cd6170e..6a3c57d 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -74,6 +74,10 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::STORE, MVT::i32, Custom);
   setOperationAction(ISD::STORE, MVT::v4i32, Custom);
 
+  setOperationAction(ISD::LOAD, MVT::f32, Custom);
+  setOperationAction(ISD::LOAD, MVT::i32, Custom);
+  setOperationAction(ISD::LOAD, MVT::v4f32, Custom);
+  setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
   setTargetDAGCombine(ISD::FP_ROUND);
 
   setSchedulingPreference(Sched::VLIW);
@@ -355,6 +359,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
   case ISD::SELECT: return LowerSELECT(Op, DAG);
   case ISD::SETCC: return LowerSETCC(Op, DAG);
   case ISD::STORE: return LowerSTORE(Op, DAG);
+  case ISD::LOAD: return LowerLOAD(Op, DAG);
   case ISD::FPOW: return LowerFPOW(Op, DAG);
   case ISD::INTRINSIC_VOID: {
     SDValue Chain = Op.getOperand(0);
@@ -518,6 +523,16 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
   switch (N->getOpcode()) {
   default: return;
   case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
+    return;
+  case ISD::LOAD: {
+    SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
+    Results.push_back(SDValue(Node, 0));
+    Results.push_back(SDValue(Node, 1));
+    // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
+    // function
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
+    return;
+  }
   }
 }
 
@@ -823,6 +838,94 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   return SDValue();
 }
 
+// return (512 + (kc_bank << 12)
+static int
+ConstantAddressBlock(unsigned AddressSpace) {
+  switch (AddressSpace) {
+  case AMDGPUAS::CONSTANT_ADDRESS:
+    return 512;
+  case AMDGPUAS::CONSTANT_BUFFER_1:
+    return 512 + 4096;
+  case AMDGPUAS::CONSTANT_BUFFER_2:
+    return 512 + 4096 * 2;
+  case AMDGPUAS::CONSTANT_BUFFER_3:
+    return 512 + 4096 * 3;
+  case AMDGPUAS::CONSTANT_BUFFER_4:
+    return 512 + 4096 * 4;
+  case AMDGPUAS::CONSTANT_BUFFER_5:
+    return 512 + 4096 * 5;
+  case AMDGPUAS::CONSTANT_BUFFER_6:
+    return 512 + 4096 * 6;
+  case AMDGPUAS::CONSTANT_BUFFER_7:
+    return 512 + 4096 * 7;
+  case AMDGPUAS::CONSTANT_BUFFER_8:
+    return 512 + 4096 * 8;
+  case AMDGPUAS::CONSTANT_BUFFER_9:
+    return 512 + 4096 * 9;
+  case AMDGPUAS::CONSTANT_BUFFER_10:
+    return 512 + 4096 * 10;
+  case AMDGPUAS::CONSTANT_BUFFER_11:
+    return 512 + 4096 * 11;
+  case AMDGPUAS::CONSTANT_BUFFER_12:
+    return 512 + 4096 * 12;
+  case AMDGPUAS::CONSTANT_BUFFER_13:
+    return 512 + 4096 * 13;
+  case AMDGPUAS::CONSTANT_BUFFER_14:
+    return 512 + 4096 * 14;
+  case AMDGPUAS::CONSTANT_BUFFER_15:
+    return 512 + 4096 * 15;
+  default:
+    return -1;
+  }
+}
+
+SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
+{
+  EVT VT = Op.getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
+  SDValue Chain = Op.getOperand(0);
+  SDValue Ptr = Op.getOperand(1);
+  SDValue LoweredLoad;
+
+  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
+  if (ConstantBlock > -1) {
+    SDValue Result;
+    if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
+        dyn_cast<Constant>(LoadNode->getSrcValue())) {
+      SDValue Slots[4];
+      for (unsigned i = 0; i < 4; i++) {
+        // We want Const position encoded with the following formula :
+        // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
+        // const_index is Ptr computed by llvm using an alignment of 16.
+        // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
+        // then div by 4 at the ISel step
+        SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+            DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
+        Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::f32, NewPtr);
+      }
+      Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, Slots, 4);
+    } else {
+      // Ptr is GA + Reg : it cant be folded, keeps it as a v4f32 load
+      Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4f32,
+          DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32))
+          );
+    }
+
+    if (!VT.isVector()) {
+      Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Result,
+          DAG.getConstant(0, MVT::i32));
+    }
+
+    SDValue MergedValues[2] = {
+        Result,
+        Chain
+    };
+    return DAG.getMergeValues(MergedValues, 2, DL);
+  }
+
+  return SDValue();
+}
 
 SDValue R600TargetLowering::LowerFPOW(SDValue Op,
     SelectionDAG &DAG) const {
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
index 2b954da..c141d50 100644
--- a/lib/Target/R600/R600ISelLowering.h
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -63,6 +63,7 @@ private:
   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
   
   bool isZero(SDValue Op) const;
 };
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 41afb06..372ed6f 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -94,6 +94,8 @@ def LAST : InstFlag<"printLast", 1>;
 def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
 def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
 def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
+def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
+def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
 
 class R600ALU_Word0 {
   field bits<32> Word0;
@@ -1572,6 +1574,78 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
       "RETURN", [(IL_retflag)]>;
 }
 
+
+//===----------------------------------------------------------------------===//
+// Constant Buffer Addressing Support
+//===----------------------------------------------------------------------===//
+
+let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"  in {
+def CONST_COPY : Instruction {
+  let OutOperandList = (outs R600_Reg32:$dst);
+  let InOperandList = (ins i32imm:$src);
+  let Pattern = [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
+  let AsmString = "CONST_COPY";
+  let neverHasSideEffects = 1;
+  let isAsCheapAsAMove = 1;
+  let Itinerary = NullALU;
+}
+} // end isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
+
+def TEX_VTX_CONSTBUF :
+  InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr",
+      [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))]>,
+  VTX_WORD1_GPR, VTX_WORD0 {
+
+  let VC_INST = 0;
+  let FETCH_TYPE = 2;
+  let FETCH_WHOLE_QUAD = 0;
+  let BUFFER_ID = 0;
+  let SRC_REL = 0;
+  let SRC_SEL_X = 0;
+  let DST_REL = 0;
+  let USE_CONST_FIELDS = 0;
+  let NUM_FORMAT_ALL = 2;
+  let FORMAT_COMP_ALL = 1;
+  let SRF_MODE_ALL = 1;
+  let MEGA_FETCH_COUNT = 16;
+  let DST_SEL_X        = 0;
+  let DST_SEL_Y        = 1;
+  let DST_SEL_Z        = 2;
+  let DST_SEL_W        = 3;
+  let DATA_FORMAT      = 35;
+
+  let Inst{31-0} = Word0;
+  let Inst{63-32} = Word1;
+
+// LLVM can only encode 64-bit instructions, so these fields are manually
+// encoded in R600CodeEmitter
+//
+// bits<16> OFFSET;
+// bits<2>  ENDIAN_SWAP = 0;
+// bits<1>  CONST_BUF_NO_STRIDE = 0;
+// bits<1>  MEGA_FETCH = 0;
+// bits<1>  ALT_CONST = 0;
+// bits<2>  BUFFER_INDEX_MODE = 0;
+
+
+
+// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+// is done in R600CodeEmitter
+//
+// Inst{79-64} = OFFSET;
+// Inst{81-80} = ENDIAN_SWAP;
+// Inst{82}    = CONST_BUF_NO_STRIDE;
+// Inst{83}    = MEGA_FETCH;
+// Inst{84}    = ALT_CONST;
+// Inst{86-85} = BUFFER_INDEX_MODE;
+// Inst{95-86} = 0; Reserved
+
+// VTX_WORD3 (Padding)
+//
+// Inst{127-96} = 0;
+}
+
+
 //===--------------------------------------------------------------------===//
 // Instructions support
 //===--------------------------------------------------------------------===//
diff --git a/lib/Target/R600/R600LowerConstCopy.cpp b/lib/Target/R600/R600LowerConstCopy.cpp
new file mode 100644
index 0000000..d14ae20
--- /dev/null
+++ b/lib/Target/R600/R600LowerConstCopy.cpp
@@ -0,0 +1,74 @@
+//===-- R600LowerConstCopy.cpp - Propagate ConstCopy / lower them to MOV---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass is intended to handle remaining ConstCopy pseudo MachineInstr.
+/// ISel will fold each Const Buffer read inside scalar ALU. However it cannot
+/// fold them inside vector instruction, like DOT4 or Cube ; ISel emits
+/// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will try
+/// to fold them if possible or replace them by MOV otherwise.
+/// TODO : Implement the folding part, using Copy Propagation algorithm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "R600InstrInfo.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+class R600LowerConstCopy : public MachineFunctionPass {
+private:
+  static char ID;
+  const R600InstrInfo *TII;
+public:
+  R600LowerConstCopy(TargetMachine &tm);
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  const char *getPassName() const { return "R600 Eliminate Symbolic Operand"; }
+};
+
+char R600LowerConstCopy::ID = 0;
+
+
+R600LowerConstCopy::R600LowerConstCopy(TargetMachine &tm) :
+    MachineFunctionPass(ID),
+    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo()))
+{
+}
+
+bool R600LowerConstCopy::runOnMachineFunction(MachineFunction &MF) {
+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                  BB != BB_E; ++BB) {
+    MachineBasicBlock &MBB = *BB;
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+                                                      I != E;) {
+      MachineInstr &MI = *I;
+      I = llvm::next(I);
+      if (MI.getOpcode() != AMDGPU::CONST_COPY)
+        continue;
+      MachineInstr *NewMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::MOV,
+          MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
+      NewMI->getOperand(9).setImm(MI.getOperand(1).getImm());
+      MI.eraseFromParent();
+    }
+  }
+  return false;
+}
+
+FunctionPass *createR600LowerConstCopy(TargetMachine &tm) {
+  return new R600LowerConstCopy(tm);
+}
+
+}
+
+
-- 
1.8.0.1