[Mesa-dev] [PATCH] R600: rework handling of the constants

Thu Dec 20 16:00:39 PST 2012

From: Vadim Girlin <vadimgirlin at gmail.com>

Remove Cxxx registers, add new special register - "ALU_CONST" and new
operand for each alu src - "sel". ALU_CONST is used to designate that the
new operand contains the value to override src.sel, src.kc_bank, src.chan
for constants in the driver.

v2[Vincent Lejeune]:
 -Parse load const address space pointer semantic
 -Constant Value are folded into instructions using them at ISel stage
---
 lib/Target/AMDGPU/AMDGPU.h                         |   1 +
 lib/Target/AMDGPU/AMDGPUISelLowering.cpp           |   1 +
 lib/Target/AMDGPU/AMDGPUISelLowering.h             |   1 +
 lib/Target/AMDGPU/AMDGPUTargetMachine.cpp          |   1 +
 lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp            |  50 +++++++++-
 .../AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp      |  78 ++++++++--------
 lib/Target/AMDGPU/R600Defines.h                    |  15 +++
 lib/Target/AMDGPU/R600ISelLowering.cpp             | 102 ++++++++++++++++++++-
 lib/Target/AMDGPU/R600ISelLowering.h               |   1 +
 lib/Target/AMDGPU/R600InstrInfo.cpp                |  18 +---
 lib/Target/AMDGPU/R600Instructions.td              |  38 ++++++--
 lib/Target/AMDGPU/R600RegisterInfo.cpp             |   7 +-
 lib/Target/AMDGPU/R600RegisterInfo.td              |  12 +--
 lib/Target/AMDGPU/r600eliminatesymbolicoperand.cpp |  81 ++++++++++++++++
 lib/Target/AMDGPU/r600eliminatesymbolicoperand.h   |   6 ++
 15 files changed, 333 insertions(+), 79 deletions(-)
 create mode 100644 lib/Target/AMDGPU/r600eliminatesymbolicoperand.cpp
 create mode 100644 lib/Target/AMDGPU/r600eliminatesymbolicoperand.h

diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index 40864b0..2263b15 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -23,6 +23,7 @@ class AMDGPUTargetMachine;
 // R600 Passes
 FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
 FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
+FunctionPass *createR600EliminateSymbolicOperandPass(TargetMachine &tm);
 
 // SI Passes
 FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 910c2ff..df1a4d6 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -398,5 +398,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(INTERP)
   NODE_NAME_CASE(INTERP_P0)
   NODE_NAME_CASE(EXPORT)
+  NODE_NAME_CASE(CONST_ADDRESS)
   }
 }
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 992dab7..5671f94 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -122,6 +122,7 @@ enum {
   INTERP,
   INTERP_P0,
   EXPORT,
+  CONST_ADDRESS,
   LAST_AMDGPU_ISD_NUMBER
 };
 
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index e42fa8a..679a0fe 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -130,6 +130,7 @@ bool AMDGPUPassConfig::addPreEmitPass() {
   const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
   if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
     addPass(createR600ExpandSpecialInstrsPass(*TM));
+    addPass(createR600EliminateSymbolicOperandPass(*TM));
     addPass(&FinalizeMachineBundlesID);
   } else {
     addPass(createSILowerLiteralConstantsPass(*TM));
diff --git a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
index a765438..f56a81f 100644
--- a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
@@ -165,6 +165,28 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
     }
     break;
   }
+  case AMDGPUISD::CONST_ADDRESS: {
+    EVT OpVT = N->getValueType(0);
+    unsigned int NewOpc = AMDGPU::CONST_COPY;
+    SDValue TZero = CurDAG->getTargetConstant(0, MVT::i32);
+    SDValue TOne = CurDAG->getTargetConstant(1, MVT::i32);
+    SDValue Args[12] = {
+        TOne,
+        TZero,
+        TZero,
+        TZero,
+        N->getOperand(0),
+        TZero,
+        TZero,
+        TZero,
+        TZero,
+        TOne,
+        CurDAG->getRegister(AMDGPU::PRED_SEL_OFF, MVT::i32),
+        TZero
+    };
+
+    return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Args, 12);
+  }
   case ISD::ConstantFP:
   case ISD::Constant: {
     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
@@ -257,7 +279,33 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
     break;
   }
   }
-  return SelectCode(N);
+  SDNode *Result = SelectCode(N);
+
+  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+    const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());
+    if (Result && TII->isALUInstr(Result->getMachineOpcode())) {
+      std::vector<SDValue> Ops;
+      MachineSDNode *PotentialGlue = 0;
+      for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
+          I != E; ++I) {
+        SDValue Value = *I;
+        if (Value.getOpcode() == AMDGPUISD::CONST_ADDRESS) {
+          if (!dyn_cast<ConstantSDNode>(Value.getOperand(1))) {
+            PotentialGlue = CurDAG->getMachineNode(AMDGPU::MOVA_INT, Value.getDebugLoc(), MVT::Glue, Value.getOperand(1));
+          }
+          Ops.push_back(Value.getOperand(0));
+        } else {
+          Ops.push_back(Value);
+        }
+      }
+      if (PotentialGlue)
+        Ops.push_back(SDValue(PotentialGlue, 0));
+      CurDAG->MorphNodeTo(Result, Result->getOpcode(), Result->getVTList(), Ops.data(), Ops.size());
+    }
+  }
+
+  return Result;
 }
 
 bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index 018234a..8be6ed4 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -64,8 +64,8 @@ private:
   void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
                     raw_ostream &OS) const;
   void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
-  void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value,
-                  raw_ostream &OS) const;
+  void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx,
+                    raw_ostream &OS) const;
   void EmitDst(const MCInst &MI, raw_ostream &OS) const;
   void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
                     raw_ostream &OS) const;
@@ -195,7 +195,6 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
                                      SmallVectorImpl<MCFixup> &Fixups,
                                      raw_ostream &OS) const {
   const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
-  unsigned NumOperands = MI.getNumOperands();
 
   // Emit instruction type
   EmitByte(INSTR_ALU, OS);
@@ -211,19 +210,21 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
     InstWord01 |= ISAOpCode << 1;
   }
 
-  unsigned SrcIdx = 0;
-  for (unsigned int OpIdx = 1; OpIdx < NumOperands; ++OpIdx) {
-    if (MI.getOperand(OpIdx).isImm() || MI.getOperand(OpIdx).isFPImm() ||
-        OpIdx == (unsigned)MCDesc.findFirstPredOperandIdx()) {
-      continue;
-    }
-    EmitSrcISA(MI, OpIdx, InstWord01, OS);
-    SrcIdx++;
-  }
+  unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 :
+      MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1;
 
-  // Emit zeros for unused sources
-  for ( ; SrcIdx < 3; SrcIdx++) {
-    EmitNullBytes(SRC_BYTE_COUNT - 6, OS);
+  EmitByte(SrcNum, OS);
+
+  const unsigned SrcOps[3][2] = {
+      {R600Operands::SRC0, R600Operands::SRC0_SEL},
+      {R600Operands::SRC1, R600Operands::SRC1_SEL},
+      {R600Operands::SRC2, R600Operands::SRC2_SEL}
+  };
+
+  for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) {
+    unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]];
+    unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]];
+    EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS);
   }
 
   Emit(InstWord01, OS);
@@ -294,34 +295,37 @@ void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
 
 }
 
-void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx,
-                                   uint64_t &Value, raw_ostream &OS) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
+void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx,
+                                   unsigned SelOpIdx, raw_ostream &OS) const {
+  const MCOperand &RegMO = MI.getOperand(RegOpIdx);
+  const MCOperand &SelMO = MI.getOperand(SelOpIdx);
+
   union {
     float f;
     uint32_t i;
   } InlineConstant;
   InlineConstant.i = 0;
-  // Emit the source select (2 bytes).  For GPRs, this is the register index.
-  // For other potential instruction operands, (e.g. constant registers) the
-  // value of the source select is defined in the r600isa docs.
-  if (MO.isReg()) {
-    unsigned Reg = MO.getReg();
-    if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) {
-      EmitByte(1, OS);
-    } else {
-      EmitByte(0, OS);
-    }
+  // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0
+  // and select is 0 (GPR index is encoded in the instr encoding. For constants
+  // type is 1 and select is the original const select passed from the driver.
+  unsigned Reg = RegMO.getReg();
+  if (Reg == AMDGPU::ALU_CONST) {
+    EmitByte(1, OS);
+    uint32_t Sel = SelMO.getImm();
+    Emit(Sel, OS);
+  } else {
+    EmitByte(0, OS);
+    Emit((uint32_t)0, OS);
+  }
 
-    if (Reg == AMDGPU::ALU_LITERAL_X) {
-      unsigned ImmOpIndex = MI.getNumOperands() - 1;
-      MCOperand ImmOp = MI.getOperand(ImmOpIndex);
-      if (ImmOp.isFPImm()) {
-        InlineConstant.f = ImmOp.getFPImm();
-      } else {
-        assert(ImmOp.isImm());
-        InlineConstant.i = ImmOp.getImm();
-      }
+  if (Reg == AMDGPU::ALU_LITERAL_X) {
+    unsigned ImmOpIndex = MI.getNumOperands() - 1;
+    MCOperand ImmOp = MI.getOperand(ImmOpIndex);
+    if (ImmOp.isFPImm()) {
+      InlineConstant.f = ImmOp.getFPImm();
+    } else {
+      assert(ImmOp.isImm());
+      InlineConstant.i = ImmOp.getImm();
     }
   }
 
diff --git a/lib/Target/AMDGPU/R600Defines.h b/lib/Target/AMDGPU/R600Defines.h
index 7dea8e4..e19eea3 100644
--- a/lib/Target/AMDGPU/R600Defines.h
+++ b/lib/Target/AMDGPU/R600Defines.h
@@ -62,18 +62,33 @@ namespace R600Operands {
     SRC0_NEG,
     SRC0_REL,
     SRC0_ABS,
+    SRC0_SEL,
     SRC1,
     SRC1_NEG,
     SRC1_REL,
     SRC1_ABS,
+    SRC1_SEL,
     SRC2,
     SRC2_NEG,
     SRC2_REL,
+    SRC2_SEL,
     LAST,
     PRED_SEL,
     IMM,
     COUNT
  };
+
+  const static int ALUOpTable[3][R600Operands::COUNT] = {
+//            W        C     S  S  S  S     S  S  S  S     S  S  S
+//            R  O  D  L  S  R  R  R  R  S  R  R  R  R  S  R  R  R  L  P
+//   D  U     I  M  R  A  R  C  C  C  C  R  C  C  C  C  R  C  C  C  A  R  I
+//   S  E  U  T  O  E  M  C  0  0  0  0  C  1  1  1  1  C  2  2  2  S  E  M
+//   T  M  P  E  D  L  P  0  N  R  A  S  1  N  R  A  S  2  N  R  S  T  D  M
+    {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12},
+    {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19},
+    {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17}
+  };
+
 }
 
 #endif // R600DEFINES_H_
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index d09f8c0e..90efbc9 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -16,6 +16,7 @@
 #include "R600Defines.h"
 #include "R600InstrInfo.h"
 #include "R600MachineFunctionInfo.h"
+#include "AMDGPURegisterInfo.h"
 #include "llvm/Argument.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -97,7 +98,9 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
   setOperationAction(ISD::VSELECT, MVT::v4f32, Expand);
   setOperationAction(ISD::VSELECT, MVT::v4i32, Expand);
+  setOperationAction(ISD::LOAD, MVT::v4f32, Custom);
   setTargetDAGCombine(ISD::FP_ROUND);
+  setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
 
   setSchedulingPreference(Sched::VLIW);
 }
@@ -139,11 +142,13 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
   }
 
   case AMDGPU::R600_LOAD_CONST: {
-    int64_t RegIndex = MI->getOperand(1).getImm();
-    unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
-    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
-                .addOperand(MI->getOperand(0))
-                .addReg(ConstantReg);
+    unsigned ConstSel = MI->getOperand(1).getImm();
+    MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
+                                                    AMDGPU::MOV,
+                                                    MI->getOperand(0).getReg(),
+                                                    AMDGPU::ALU_CONST);
+    int SelIdx = TII->getOperandIdx(*NewMI, R600Operands::SRC0_SEL);
+    NewMI->getOperand(SelIdx).setImm(ConstSel);
     break;
   }
 
@@ -417,6 +422,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
   case ISD::SELECT: return LowerSELECT(Op, DAG);
   case ISD::SETCC: return LowerSETCC(Op, DAG);
+  case ISD::LOAD: return LowerLOAD(Op, DAG);
   case ISD::FPOW: return LowerFPOW(Op, DAG);
   case ISD::INTRINSIC_VOID: {
     SDValue Chain = Op.getOperand(0);
@@ -580,6 +586,16 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
   switch (N->getOpcode()) {
   default: return;
   case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
+    return;
+  case ISD::LOAD: {
+    SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
+    Results.push_back(SDValue(Node, 0));
+    Results.push_back(SDValue(Node, 1));
+    // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
+    // function
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
+    return;
+  }
   }
 }
 
@@ -861,6 +877,71 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   return Cond;
 }
 
+static
+const GlobalValue *SelectAddr(SDValue Addr, unsigned &Offset, SDValue &BaseReg) {
+  switch (Addr.getOpcode()) {
+  case ISD::GlobalAddress: {
+    GlobalAddressSDNode * G = dyn_cast<GlobalAddressSDNode>(Addr);
+    Offset = G->getOffset();
+    return G->getGlobal();
+  }
+  case ISD::ADD: {
+    const GlobalValue *Result;
+    SDValue NoOpValue;
+    if (Result = SelectAddr(Addr.getOperand(0), Offset, NoOpValue)) {
+      ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+      if (Const) {
+        Offset += Const->getZExtValue();
+      } else {
+        BaseReg = Addr.getOperand(1);
+      }
+    } else if (Result = SelectAddr(Addr.getOperand(1), Offset, NoOpValue)) {
+      ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Addr.getOperand(0));
+      if (Const) {
+        Offset += Const->getZExtValue();
+      } else {
+        BaseReg = Addr.getOperand(0);
+      }
+    }
+    return Result;
+  }
+  default:
+    return NULL;
+  }
+}
+
+SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
+{
+  EVT VT = Op.getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
+  SDValue Chain = Op.getOperand(0);
+  SDValue Ptr = Op.getOperand(1);
+  SDValue LoweredLoad;
+
+  if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) {
+    unsigned Offset;
+    SDValue BaseReg;
+    const GlobalValue *GV = SelectAddr(Op.getOperand(1), Offset, BaseReg);
+    if (BaseReg.getNode()) {
+      BaseReg = DAG.getNode(ISD::SRL, DL, MVT::i32, BaseReg, DAG.getConstant(2, MVT::i32));
+    } else {
+      BaseReg = DAG.getConstant(0, MVT::i32);
+    }
+    SDValue Slots[4];
+    for (unsigned i = 0; i < 4; i++) {
+      SDValue TGA = DAG.getTargetGlobalAddress(GV, DL, MVT::f32, Offset / 4 + i);
+      Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::f32,
+        TGA, BaseReg);
+    }
+    SDValue MergedValues[2] = {
+        DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, Slots, 4),
+        Chain
+    };
+    return DAG.getMergeValues(MergedValues, 2, DL);
+  }
+}
+
 SDValue R600TargetLowering::LowerFPOW(SDValue Op,
     SelectionDAG &DAG) const {
   DebugLoc DL = Op.getDebugLoc();
@@ -918,6 +999,17 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
       }
       break;
     }
+  // Extract_vec (Build_vector) generated by custom lowering
+  // also needs to be customly combined
+  case ISD::EXTRACT_VECTOR_ELT: {
+    SDValue Arg = N->getOperand(0);
+    if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
+      if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+        unsigned Element = Const->getZExtValue();
+        return Arg->getOperand(Element);
+      }
+    }
+  }
   }
   return SDValue();
 }
diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h
index bdb0a55..70ece84 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/lib/Target/AMDGPU/R600ISelLowering.h
@@ -62,6 +62,7 @@ private:
   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
   
   bool isZero(SDValue Op) const;
 };
diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp
index a60a180..6c1c50a 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -484,13 +484,15 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB
      .addReg(Src0Reg)  // $src0
      .addImm(0)        // $src0_neg
      .addImm(0)        // $src0_rel
-     .addImm(0);       // $src0_abs
+     .addImm(0)        // $src0_abs
+     .addImm(0);       // $src0_sel
 
   if (Src1Reg) {
     MIB.addReg(Src1Reg) // $src1
        .addImm(0)       // $src1_neg
        .addImm(0)       // $src1_rel
-       .addImm(0);       // $src1_abs
+       .addImm(0)       // $src1_abs
+       .addImm(0);      // $src1_sel
   }
 
   //XXX: The r600g finalizer expects this to be 1, once we've moved the
@@ -519,16 +521,6 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
 
 int R600InstrInfo::getOperandIdx(unsigned Opcode,
                                  R600Operands::Ops Op) const {
-  const static int OpTable[3][R600Operands::COUNT] = {
-//            W        C     S  S  S     S  S  S     S  S
-//            R  O  D  L  S  R  R  R  S  R  R  R  S  R  R  L  P
-//   D  U     I  M  R  A  R  C  C  C  C  C  C  C  R  C  C  A  R  I
-//   S  E  U  T  O  E  M  C  0  0  0  C  1  1  1  C  2  2  S  E  M
-//   T  M  P  E  D  L  P  0  N  R  A  1  N  R  A  2  N  R  T  D  M
-    {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8,-1,-1,-1,-1,-1,-1,-1, 9,10,11},
-    {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,-1,-1,-1,13,14,15,16,17},
-    {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8,-1, 9,10,11,12,13,14}
-  };
   unsigned TargetFlags = get(Opcode).TSFlags;
   unsigned OpTableIdx;
 
@@ -554,7 +546,7 @@ int R600InstrInfo::getOperandIdx(unsigned Opcode,
     OpTableIdx = 2;
   }
 
-  return OpTable[OpTableIdx][Op];
+  return R600Operands::ALUOpTable[OpTableIdx][Op];
 }
 
 void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index 66c9249..845784b 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -69,6 +69,8 @@ class InstFlag<string PM = "printOperand", int Default = 0>
   let PrintMethod = PM;
 }
 
+def SEL : OperandWithDefaultOps <i32, (ops (i32 0))>;
+
 def LITERAL : InstFlag<"printLiteral">;
 
 def WRITE : InstFlag <"printWrite", 1>;
@@ -213,7 +215,7 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
     InstR600 <0,
               (outs R600_Reg32:$dst),
               (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
-                   R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
+                   R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
                    LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
               !strconcat(opName,
                    "$clamp $dst$write$dst_rel$omod, "
@@ -253,8 +255,8 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
           (outs R600_Reg32:$dst),
           (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
                OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
-               R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
-               R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs,
+               R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
+               R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
                LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
           !strconcat(opName,
                 "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
@@ -290,9 +292,9 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
   InstR600 <0,
           (outs R600_Reg32:$dst),
           (ins REL:$dst_rel, CLAMP:$clamp,
-               R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel,
-               R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel,
-               R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel,
+               R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
+               R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
+               R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
                LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
           !strconcat(opName, "$clamp $dst$dst_rel, "
                              "$src0_neg$src0$src0_rel, "
@@ -415,7 +417,7 @@ def isR600toCayman : Predicate<
                      "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
 
 //===----------------------------------------------------------------------===//
-// Interpolation Instructions
+// R600 SDNodes
 //===----------------------------------------------------------------------===//
 
 def INTERP: SDNode<"AMDGPUISD::INTERP",
@@ -426,6 +428,18 @@ def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0",
   SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]>
   >;
 
+def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
+  SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]>,
+  [SDNPMayLoad]
+>;
+
+def MOVA_INT : InstR600 <0xCC, (outs), (ins R600_Reg32:$src0), "MOVA_INT", [],
+AnyALU>;
+
+//===----------------------------------------------------------------------===//
+// Interpolation Instructions
+//===----------------------------------------------------------------------===//
+
 let usesCustomInserter = 1 in {
 def input_perspective :  AMDGPUShaderInst <
   (outs R600_Reg128:$dst),
@@ -1567,6 +1581,16 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
       "RETURN", [(IL_retflag)]>;
 }
 
+
+//===----------------------------------------------------------------------===//
+// Constant Buffer Addressing Support
+//===----------------------------------------------------------------------===//
+
+def CONST_COPY : R600_1OP <0x19, "CONST_COPY", []>
+{
+  let mayLoad = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // ISel Patterns
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/R600RegisterInfo.cpp b/lib/Target/AMDGPU/R600RegisterInfo.cpp
index a39f83d..397fbaf 100644
--- a/lib/Target/AMDGPU/R600RegisterInfo.cpp
+++ b/lib/Target/AMDGPU/R600RegisterInfo.cpp
@@ -15,6 +15,7 @@
 #include "R600RegisterInfo.h"
 #include "AMDGPUTargetMachine.h"
 #include "R600Defines.h"
+#include "R600InstrInfo.h"
 #include "R600MachineFunctionInfo.h"
 
 using namespace llvm;
@@ -38,16 +39,12 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(AMDGPU::NEG_ONE);
   Reserved.set(AMDGPU::PV_X);
   Reserved.set(AMDGPU::ALU_LITERAL_X);
+  Reserved.set(AMDGPU::ALU_CONST);
   Reserved.set(AMDGPU::PREDICATE_BIT);
   Reserved.set(AMDGPU::PRED_SEL_OFF);
   Reserved.set(AMDGPU::PRED_SEL_ZERO);
   Reserved.set(AMDGPU::PRED_SEL_ONE);
 
-  for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(),
-                        E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) {
-    Reserved.set(*I);
-  }
-
   for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
                                     E = MFI->ReservedRegs.end(); I != E; ++I) {
     Reserved.set(*I);
diff --git a/lib/Target/AMDGPU/R600RegisterInfo.td b/lib/Target/AMDGPU/R600RegisterInfo.td
index d3d6d25..10fe858 100644
--- a/lib/Target/AMDGPU/R600RegisterInfo.td
+++ b/lib/Target/AMDGPU/R600RegisterInfo.td
@@ -28,9 +28,6 @@ foreach Index = 0-127 in {
     // 32-bit Temporary Registers
     def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
 
-    // 32-bit Constant Registers (There are more than 128, this the number
-    // that is currently supported.
-    def C#Index#_#Chan : R600RegWithChan <"C"#Index#"."#Chan, Index, Chan>;
   }
   // 128-bit Temporary Registers
   def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
@@ -46,7 +43,6 @@ foreach Index = 448-464 in {
   def ArrayBase#Index :  R600Reg<"ARRAY_BASE", Index>;
 }
 
-
 // Special Registers
 
 def ZERO : R600Reg<"0.0", 248>;
@@ -61,16 +57,11 @@ def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
 def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
 def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
 def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
+def ALU_CONST : R600Reg<"Const", 0>;
 
 def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
                           (add (sequence "ArrayBase%u", 448, 464))>;
 
-def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
-                          (add (interleave
-                                  (interleave (sequence "C%u_X", 0, 127),
-                                              (sequence "C%u_Z", 0, 127)),
-                                  (interleave (sequence "C%u_Y", 0, 127),
-                                              (sequence "C%u_W", 0, 127))))>;
 
 def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
                                    (add (sequence "T%u_X", 0, 127))>;
@@ -91,7 +82,6 @@ def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
 
 def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
     R600_TReg32,
-    R600_CReg32,
     R600_ArrayBase,
     ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
 
diff --git a/lib/Target/AMDGPU/r600eliminatesymbolicoperand.cpp b/lib/Target/AMDGPU/r600eliminatesymbolicoperand.cpp
new file mode 100644
index 0000000..ffff73b
--- /dev/null
+++ b/lib/Target/AMDGPU/r600eliminatesymbolicoperand.cpp
@@ -0,0 +1,81 @@
+//===-- R600eliminatesymbolicoperand.cpp - Eliminate Symbolic Operands-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Before this pass backend can manipulate symbolic operands like
+/// GlobalAddress for data read from Const Buffers or FrameIndex for stack
+/// allocated array. This pass is used to change these operands by a value
+/// that can be passed to MCInstrEmitter.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "R600InstrInfo.h"
+#include <iostream>
+namespace llvm {
+
+class R600EliminateSymbolicOperand : public MachineFunctionPass {
+private:
+  static char ID;
+  const R600InstrInfo *TII;
+public:
+  R600EliminateSymbolicOperand(TargetMachine &tm);
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  const char *getPassName() const { return "R600 Eliminate Symbolic Operand"; }
+};
+
+char R600EliminateSymbolicOperand::ID = 0;
+
+
+R600EliminateSymbolicOperand::R600EliminateSymbolicOperand(TargetMachine &tm) :
+    MachineFunctionPass(ID),
+    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo()))
+{
+}
+
+bool R600EliminateSymbolicOperand::runOnMachineFunction(MachineFunction &MF) {
+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                  BB != BB_E; ++BB) {
+    MachineBasicBlock &MBB = *BB;
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+                                                      I != E; ++I) {
+      MachineInstr &MI = *I;
+      for (unsigned i = 0; i < MI.getNumOperands(); i++) {
+        MachineOperand &MO = MI.getOperand(i);
+        if (MO.isGlobal()) {
+          unsigned ConstSel = MO.getOffset() + 2048;
+          int SelIdx;
+          if (i == TII->getOperandIdx(MI, R600Operands::SRC0)) {
+            SelIdx = TII->getOperandIdx(MI, R600Operands::SRC0_SEL);
+          } else if (i == TII->getOperandIdx(MI, R600Operands::SRC1)) {
+            SelIdx = TII->getOperandIdx(MI, R600Operands::SRC1_SEL);
+          } else if (i == TII->getOperandIdx(MI, R600Operands::SRC2)) {
+            SelIdx = TII->getOperandIdx(MI, R600Operands::SRC2_SEL);
+          } else {
+            assert(0 && "Wrong operand location for Global Address");
+          }
+          MI.getOperand(SelIdx).setImm(ConstSel);
+          MO.ChangeToRegister(AMDGPU::ALU_CONST, false);
+        }
+      }
+    }
+  }
+  return false;
+}
+
+FunctionPass *createR600EliminateSymbolicOperandPass(TargetMachine &tm) {
+  return new R600EliminateSymbolicOperand(tm);
+}
+
+}
+
+
diff --git a/lib/Target/AMDGPU/r600eliminatesymbolicoperand.h b/lib/Target/AMDGPU/r600eliminatesymbolicoperand.h
new file mode 100644
index 0000000..509c5bc
--- /dev/null
+++ b/lib/Target/AMDGPU/r600eliminatesymbolicoperand.h
@@ -0,0 +1,6 @@
+#ifndef R600ELIMINATESYMBOLICOPERAND_H
+#define R600ELIMINATESYMBOLICOPERAND_H
+
+
+
+#endif // R600ELIMINATESYMBOLICOPERAND_H
-- 
1.8.0.2