[Mesa-dev] [PATCH 4/5] R600: use pointers for constants
Vincent Lejeune
vljn at ovi.com
Tue Jan 8 13:42:28 PST 2013
---
lib/Target/R600/AMDGPU.h | 1 +
lib/Target/R600/AMDGPUTargetMachine.cpp | 1 +
lib/Target/R600/AMDIL.h | 19 +++-
lib/Target/R600/AMDILISelDAGToDAG.cpp | 104 +++++----------------
lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 3 +-
lib/Target/R600/R600ISelLowering.cpp | 103 ++++++++++++++++++++
lib/Target/R600/R600ISelLowering.h | 1 +
lib/Target/R600/R600Instructions.td | 74 +++++++++++++++
lib/Target/R600/R600LowerConstCopy.cpp | 74 +++++++++++++++
9 files changed, 296 insertions(+), 84 deletions(-)
create mode 100644 lib/Target/R600/R600LowerConstCopy.cpp
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index 0f5125d..22351bf 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -23,6 +23,7 @@ class AMDGPUTargetMachine;
// R600 Passes
FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
+FunctionPass *createR600LowerConstCopy(TargetMachine &tm);
// SI Passes
FunctionPass *createSIAnnotateControlFlowPass();
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index d09dc2e..ad0d434 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -131,6 +131,7 @@ bool AMDGPUPassConfig::addPreEmitPass() {
addPass(createAMDGPUCFGPreparationPass(*TM));
addPass(createAMDGPUCFGStructurizerPass(*TM));
addPass(createR600ExpandSpecialInstrsPass(*TM));
+ addPass(createR600LowerConstCopy(*TM));
addPass(&FinalizeMachineBundlesID);
} else {
addPass(createSILowerLiteralConstantsPass(*TM));
diff --git a/lib/Target/R600/AMDIL.h b/lib/Target/R600/AMDIL.h
index 4e577dc..dee51bc 100644
--- a/lib/Target/R600/AMDIL.h
+++ b/lib/Target/R600/AMDIL.h
@@ -90,14 +90,29 @@ namespace AMDGPUAS {
enum AddressSpaces {
PRIVATE_ADDRESS = 0, ///< Address space for private memory.
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
- CONSTANT_ADDRESS = 2, ///< Address space for constant memory.
+ CONSTANT_ADDRESS = 2, ///< Address space for constant memory (aka CONSTANT_BUFFER_0)
LOCAL_ADDRESS = 3, ///< Address space for local memory.
REGION_ADDRESS = 4, ///< Address space for region memory.
ADDRESS_NONE = 5, ///< Address space for unknown memory.
PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
USER_SGPR_ADDRESS = 8, ///< Address space for USER_SGPRS on SI
- LAST_ADDRESS = 9
+ CONSTANT_BUFFER_1 = 9,
+ CONSTANT_BUFFER_2 = 10,
+ CONSTANT_BUFFER_3 = 11,
+ CONSTANT_BUFFER_4 = 12,
+ CONSTANT_BUFFER_5 = 13,
+ CONSTANT_BUFFER_6 = 14,
+ CONSTANT_BUFFER_7 = 15,
+ CONSTANT_BUFFER_8 = 16,
+ CONSTANT_BUFFER_9 = 17,
+ CONSTANT_BUFFER_10 = 18,
+ CONSTANT_BUFFER_11 = 19,
+ CONSTANT_BUFFER_12 = 20,
+ CONSTANT_BUFFER_13 = 21,
+ CONSTANT_BUFFER_14 = 22,
+ CONSTANT_BUFFER_15 = 23,
+ LAST_ADDRESS = 24
};
} // namespace AMDGPUAS
diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp
index 725a5e4..8fbf153 100644
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include <list>
#include <queue>
@@ -67,6 +68,9 @@ private:
static bool isLocalLoad(const LoadSDNode *N);
static bool isRegionLoad(const LoadSDNode *N);
+ bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
+ bool SelectGlobalValueVariableOffset(SDValue Addr,
+ SDValue &BaseReg, SDValue& Offset);
bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
@@ -258,87 +262,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
break;
}
-
- case ISD::INTRINSIC_WO_CHAIN: {
- const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
- if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
- unsigned IntrinsicID = N->getConstantOperandVal(0);
- switch (IntrinsicID) {
- case AMDGPUIntrinsic::AMDGPU_load_const: {
- const R600InstrInfo *TII =
- static_cast<const R600InstrInfo*>(CurDAG->getTarget().getInstrInfo());
- int CSel = N->getConstantOperandVal(1);
- std::vector<SDValue> Ops;
-
- // We'll try to fold the const in the alu insts with native operands
- for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
- Use != SDNode::use_end(); Use = Next) {
- Next = llvm::next(Use);
-
- if (!Use->isMachineOpcode())
- continue;
-
- unsigned Opcode = Use->getMachineOpcode();
-
- const MCInstrDesc & MCDesc = TII->get(Opcode);
- if (HAS_NATIVE_OPERANDS(MCDesc.TSFlags)) {
-
- unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 :
- MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1;
-
- const unsigned SrcOps[3][2] = {
- {R600Operands::SRC0, R600Operands::SRC0_SEL},
- {R600Operands::SRC1, R600Operands::SRC1_SEL},
- {R600Operands::SRC2, R600Operands::SRC2_SEL}
- };
-
- Ops.clear();
- for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
- Ops.push_back(Use->getOperand(i));
- }
-
- int RegOpIdx = Use.getOperandNo();
- unsigned SrcIdx;
-
- for (SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) {
- if (R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]] ==
- RegOpIdx + 1)
- break;
- }
-
- assert(SrcIdx < SrcNum && "ALU const folding: invalid operand");
-
- int SelOpIdx =
- R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]] - 1;
-
- Ops[RegOpIdx] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
- Ops[SelOpIdx] = CurDAG->getTargetConstant(CSel, MVT::i32);
-
- CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
- }
- }
-
- Ops.clear();
- Ops.push_back(CurDAG->getTargetConstant(1, MVT::i32));
- Ops.push_back(CurDAG->getTargetConstant(0, MVT::i32));
- Ops.push_back(CurDAG->getTargetConstant(0, MVT::i32));
- Ops.push_back(CurDAG->getTargetConstant(0, MVT::i32));
- Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32));
- Ops.push_back(CurDAG->getTargetConstant(0, MVT::i32));
- Ops.push_back(CurDAG->getTargetConstant(0, MVT::i32));
- Ops.push_back(CurDAG->getTargetConstant(0, MVT::i32));
- Ops.push_back(CurDAG->getTargetConstant(CSel, MVT::i32));
- Ops.push_back(CurDAG->getTargetConstant(1, MVT::i32));
- Ops.push_back(CurDAG->getRegister(AMDGPU::PRED_SEL_OFF, MVT::f32));
- Ops.push_back(CurDAG->getTargetConstant(0, MVT::i32));
-
- return CurDAG->SelectNodeTo(N, AMDGPU::MOV, MVT::f32, Ops.data(),
- Ops.size());
-
- }
- }
- }
- }
}
return SelectCode(N);
}
@@ -487,6 +410,25 @@ const char *AMDGPUDAGToDAGISel::getPassName() const {
///==== AMDGPU Functions ====///
+bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
+ SDValue& IntPtr) {
+ if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
+ IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
+ return true;
+ }
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
+ SDValue& BaseReg, SDValue &Offset) {
+ if (!dyn_cast<ConstantSDNode>(Addr)) {
+ BaseReg = Addr;
+ Offset = CurDAG->getIntPtrConstant(0, true);
+ return true;
+ }
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
SDValue& Offset) {
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index e357598..e061b18 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -164,7 +164,8 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case AMDGPU::VTX_READ_PARAM_32_eg:
case AMDGPU::VTX_READ_GLOBAL_8_eg:
case AMDGPU::VTX_READ_GLOBAL_32_eg:
- case AMDGPU::VTX_READ_GLOBAL_128_eg: {
+ case AMDGPU::VTX_READ_GLOBAL_128_eg:
+ case AMDGPU::TEX_VTX_CONSTBUF: {
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index cd6170e..6a3c57d 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -74,6 +74,10 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::STORE, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::f32, Custom);
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4f32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
setTargetDAGCombine(ISD::FP_ROUND);
setSchedulingPreference(Sched::VLIW);
@@ -355,6 +359,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::FPOW: return LowerFPOW(Op, DAG);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
@@ -518,6 +523,16 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
switch (N->getOpcode()) {
default: return;
case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
+ return;
+ case ISD::LOAD: {
+ SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
+ Results.push_back(SDValue(Node, 0));
+ Results.push_back(SDValue(Node, 1));
+ // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
+ // function
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
+ return;
+ }
}
}
@@ -823,6 +838,94 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
+// return (512 + (kc_bank << 12)
+static int
+ConstantAddressBlock(unsigned AddressSpace) {
+ switch (AddressSpace) {
+ case AMDGPUAS::CONSTANT_ADDRESS:
+ return 512;
+ case AMDGPUAS::CONSTANT_BUFFER_1:
+ return 512 + 4096;
+ case AMDGPUAS::CONSTANT_BUFFER_2:
+ return 512 + 4096 * 2;
+ case AMDGPUAS::CONSTANT_BUFFER_3:
+ return 512 + 4096 * 3;
+ case AMDGPUAS::CONSTANT_BUFFER_4:
+ return 512 + 4096 * 4;
+ case AMDGPUAS::CONSTANT_BUFFER_5:
+ return 512 + 4096 * 5;
+ case AMDGPUAS::CONSTANT_BUFFER_6:
+ return 512 + 4096 * 6;
+ case AMDGPUAS::CONSTANT_BUFFER_7:
+ return 512 + 4096 * 7;
+ case AMDGPUAS::CONSTANT_BUFFER_8:
+ return 512 + 4096 * 8;
+ case AMDGPUAS::CONSTANT_BUFFER_9:
+ return 512 + 4096 * 9;
+ case AMDGPUAS::CONSTANT_BUFFER_10:
+ return 512 + 4096 * 10;
+ case AMDGPUAS::CONSTANT_BUFFER_11:
+ return 512 + 4096 * 11;
+ case AMDGPUAS::CONSTANT_BUFFER_12:
+ return 512 + 4096 * 12;
+ case AMDGPUAS::CONSTANT_BUFFER_13:
+ return 512 + 4096 * 13;
+ case AMDGPUAS::CONSTANT_BUFFER_14:
+ return 512 + 4096 * 14;
+ case AMDGPUAS::CONSTANT_BUFFER_15:
+ return 512 + 4096 * 15;
+ default:
+ return -1;
+ }
+}
+
+SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+ LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Ptr = Op.getOperand(1);
+ SDValue LoweredLoad;
+
+ int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
+ if (ConstantBlock > -1) {
+ SDValue Result;
+ if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
+ dyn_cast<Constant>(LoadNode->getSrcValue())) {
+ SDValue Slots[4];
+ for (unsigned i = 0; i < 4; i++) {
+ // We want Const position encoded with the following formula :
+ // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
+ // const_index is Ptr computed by llvm using an alignment of 16.
+ // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
+ // then div by 4 at the ISel step
+ SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
+ Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::f32, NewPtr);
+ }
+ Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, Slots, 4);
+ } else {
+ // Ptr is GA + Reg : it cant be folded, keeps it as a v4f32 load
+ Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4f32,
+ DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32))
+ );
+ }
+
+ if (!VT.isVector()) {
+ Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Result,
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ SDValue MergedValues[2] = {
+ Result,
+ Chain
+ };
+ return DAG.getMergeValues(MergedValues, 2, DL);
+ }
+
+ return SDValue();
+}
SDValue R600TargetLowering::LowerFPOW(SDValue Op,
SelectionDAG &DAG) const {
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
index 2b954da..c141d50 100644
--- a/lib/Target/R600/R600ISelLowering.h
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -63,6 +63,7 @@ private:
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
bool isZero(SDValue Op) const;
};
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 41afb06..372ed6f 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -94,6 +94,8 @@ def LAST : InstFlag<"printLast", 1>;
def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
+def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
+def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
class R600ALU_Word0 {
field bits<32> Word0;
@@ -1572,6 +1574,78 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
"RETURN", [(IL_retflag)]>;
}
+
+//===----------------------------------------------------------------------===//
+// Constant Buffer Addressing Support
+//===----------------------------------------------------------------------===//
+
+let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
+def CONST_COPY : Instruction {
+ let OutOperandList = (outs R600_Reg32:$dst);
+ let InOperandList = (ins i32imm:$src);
+ let Pattern = [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
+ let AsmString = "CONST_COPY";
+ let neverHasSideEffects = 1;
+ let isAsCheapAsAMove = 1;
+ let Itinerary = NullALU;
+}
+} // end isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
+
+def TEX_VTX_CONSTBUF :
+ InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr",
+ [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))]>,
+ VTX_WORD1_GPR, VTX_WORD0 {
+
+ let VC_INST = 0;
+ let FETCH_TYPE = 2;
+ let FETCH_WHOLE_QUAD = 0;
+ let BUFFER_ID = 0;
+ let SRC_REL = 0;
+ let SRC_SEL_X = 0;
+ let DST_REL = 0;
+ let USE_CONST_FIELDS = 0;
+ let NUM_FORMAT_ALL = 2;
+ let FORMAT_COMP_ALL = 1;
+ let SRF_MODE_ALL = 1;
+ let MEGA_FETCH_COUNT = 16;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 1;
+ let DST_SEL_Z = 2;
+ let DST_SEL_W = 3;
+ let DATA_FORMAT = 35;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+
+// LLVM can only encode 64-bit instructions, so these fields are manually
+// encoded in R600CodeEmitter
+//
+// bits<16> OFFSET;
+// bits<2> ENDIAN_SWAP = 0;
+// bits<1> CONST_BUF_NO_STRIDE = 0;
+// bits<1> MEGA_FETCH = 0;
+// bits<1> ALT_CONST = 0;
+// bits<2> BUFFER_INDEX_MODE = 0;
+
+
+
+// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+// is done in R600CodeEmitter
+//
+// Inst{79-64} = OFFSET;
+// Inst{81-80} = ENDIAN_SWAP;
+// Inst{82} = CONST_BUF_NO_STRIDE;
+// Inst{83} = MEGA_FETCH;
+// Inst{84} = ALT_CONST;
+// Inst{86-85} = BUFFER_INDEX_MODE;
+// Inst{95-86} = 0; Reserved
+
+// VTX_WORD3 (Padding)
+//
+// Inst{127-96} = 0;
+}
+
+
//===--------------------------------------------------------------------===//
// Instructions support
//===--------------------------------------------------------------------===//
diff --git a/lib/Target/R600/R600LowerConstCopy.cpp b/lib/Target/R600/R600LowerConstCopy.cpp
new file mode 100644
index 0000000..d14ae20
--- /dev/null
+++ b/lib/Target/R600/R600LowerConstCopy.cpp
@@ -0,0 +1,74 @@
+//===-- R600LowerConstCopy.cpp - Propagate ConstCopy / lower them to MOV---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass is intended to handle remaining ConstCopy pseudo MachineInstr.
+/// ISel will fold each Const Buffer read inside scalar ALU. However it cannot
+/// fold them inside vector instruction, like DOT4 or Cube ; ISel emits
+/// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will try
+/// to fold them if possible or replace them by MOV otherwise.
+/// TODO : Implement the folding part, using Copy Propagation algorithm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "R600InstrInfo.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+class R600LowerConstCopy : public MachineFunctionPass {
+private:
+ static char ID;
+ const R600InstrInfo *TII;
+public:
+ R600LowerConstCopy(TargetMachine &tm);
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "R600 Eliminate Symbolic Operand"; }
+};
+
+char R600LowerConstCopy::ID = 0;
+
+
+R600LowerConstCopy::R600LowerConstCopy(TargetMachine &tm) :
+ MachineFunctionPass(ID),
+ TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo()))
+{
+}
+
+bool R600LowerConstCopy::runOnMachineFunction(MachineFunction &MF) {
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E;) {
+ MachineInstr &MI = *I;
+ I = llvm::next(I);
+ if (MI.getOpcode() != AMDGPU::CONST_COPY)
+ continue;
+ MachineInstr *NewMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::MOV,
+ MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
+ NewMI->getOperand(9).setImm(MI.getOperand(1).getImm());
+ MI.eraseFromParent();
+ }
+ }
+ return false;
+}
+
+FunctionPass *createR600LowerConstCopy(TargetMachine &tm) {
+ return new R600LowerConstCopy(tm);
+}
+
+}
+
+
--
1.8.0.1
More information about the mesa-dev
mailing list