[Mesa-dev] [PATCH 3/4] R600: rework handling of the constants
Tom Stellard
tom at stellard.net
Thu Jan 3 07:13:07 PST 2013
On Wed, Dec 26, 2012 at 05:37:20PM +0100, Vincent Lejeune wrote:
> From: Vadim Girlin <vadimgirlin at gmail.com>
>
> Remove Cxxx registers, add new special register - "ALU_CONST" and new
> operand for each alu src - "sel". ALU_CONST is used to designate that the
> new operand contains the value to override src.sel, src.kc_bank, src.chan
> for constants in the driver.
>
> v2[Vincent Lejeune]:
> -Parse load const address space pointer semantic
Just one coding style mistake I saw, otherwise:
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
> ---
> lib/Target/AMDGPU/AMDGPU.h | 1 +
> lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 1 +
> lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp | 79 +++++++++++++
> .../AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp | 81 +++++++-------
> lib/Target/AMDGPU/R600Defines.h | 15 +++
> lib/Target/AMDGPU/R600EliminateSymbolicOperand.cpp | 122 +++++++++++++++++++++
> lib/Target/AMDGPU/R600ISelLowering.cpp | 107 ++++++++++++++++--
> lib/Target/AMDGPU/R600ISelLowering.h | 1 +
> lib/Target/AMDGPU/R600InstrInfo.cpp | 18 +--
> lib/Target/AMDGPU/R600Instructions.td | 84 +++++++++++++-
> lib/Target/AMDGPU/R600RegisterInfo.cpp | 7 +-
> lib/Target/AMDGPU/R600RegisterInfo.td | 12 +-
> 12 files changed, 448 insertions(+), 80 deletions(-)
> create mode 100644 lib/Target/AMDGPU/R600EliminateSymbolicOperand.cpp
>
> diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
> index 40864b0..2263b15 100644
> --- a/lib/Target/AMDGPU/AMDGPU.h
> +++ b/lib/Target/AMDGPU/AMDGPU.h
> @@ -23,6 +23,7 @@ class AMDGPUTargetMachine;
> // R600 Passes
> FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
> FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
> +FunctionPass *createR600EliminateSymbolicOperandPass(TargetMachine &tm);
>
> // SI Passes
> FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
> diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
> index e42fa8a..679a0fe 100644
> --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
> +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
> @@ -130,6 +130,7 @@ bool AMDGPUPassConfig::addPreEmitPass() {
> const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
> if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
> addPass(createR600ExpandSpecialInstrsPass(*TM));
> + addPass(createR600EliminateSymbolicOperandPass(*TM));
> addPass(&FinalizeMachineBundlesID);
> } else {
> addPass(createSILowerLiteralConstantsPass(*TM));
> diff --git a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
> index a765438..6f156df 100644
> --- a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
> +++ b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
> @@ -20,6 +20,7 @@
> #include "llvm/CodeGen/PseudoSourceValue.h"
> #include "llvm/CodeGen/SelectionDAGISel.h"
> #include "llvm/Support/Compiler.h"
> +#include "llvm/CodeGen/SelectionDAG.h"
> #include <list>
> #include <queue>
>
> @@ -66,6 +67,9 @@ private:
> static bool isLocalLoad(const LoadSDNode *N);
> static bool isRegionLoad(const LoadSDNode *N);
>
> + bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& TGA);
> + bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue& Offset,
> + SDValue &TGA);
> bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
> bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
> bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
> @@ -400,6 +404,81 @@ const char *AMDGPUDAGToDAGISel::getPassName() const {
>
> ///==== AMDGPU Functions ====///
>
> +class R600Address {
> +public:
> + const GlobalValue *Global;
> + unsigned Offset;
> + SDValue BaseReg;
> +
> + R600Address(const GlobalValue *GV, unsigned Off, const SDValue &BR) :
> + Global(GV), Offset(Off), BaseReg(BR)
> + { }
> +
> +};
> +
> +static
> +const R600Address SelectAddr(SDValue Addr, SelectionDAG &DAG) {
> + switch (Addr.getOpcode()) {
> + case ISD::Constant: {
> + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Addr);
> + return R600Address(0, Const->getZExtValue(), SDValue());
> + }
> + case ISD::GlobalAddress: {
> + GlobalAddressSDNode * G = dyn_cast<GlobalAddressSDNode>(Addr);
> + return R600Address(G->getGlobal(), 0, SDValue());
> + }
> + case ISD::OR:
> + if (!DAG.isBaseWithConstantOffset(Addr))
> + break;
> + // Else OR and ADD code is the same
> + case ISD::ADD: {
> + const R600Address &LHSAddr = SelectAddr(Addr.getOperand(0), DAG);
> + const R600Address &RHSAddr = SelectAddr(Addr.getOperand(1), DAG);
> +
> + if (LHSAddr.BaseReg.getNode() && RHSAddr.BaseReg.getNode()) {
> + break;
> + }
> +
> + const GlobalValue *NewGV = LHSAddr.Global?LHSAddr.Global:RHSAddr.Global;
> + if (LHSAddr.BaseReg.getNode()) {
> + return R600Address(NewGV, LHSAddr.Offset + RHSAddr.Offset,
> + LHSAddr.BaseReg);
> + } else if (RHSAddr.BaseReg.getNode()) {
> + return R600Address(NewGV, LHSAddr.Offset + RHSAddr.Offset,
> + RHSAddr.BaseReg);
> + } else {
> + return R600Address(NewGV, LHSAddr.Offset + RHSAddr.Offset, SDValue());
> + }
> + }
> + default:
> + break;
> + }
> + return R600Address(0, 0, Addr);
> +}
> +
> +bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
> + SDValue& TGA) {
> + const R600Address R600Addr = SelectAddr(Addr, *CurDAG);
> + if (!R600Addr.BaseReg.getNode()) {
> + TGA = CurDAG->getTargetGlobalAddress(R600Addr.Global, Addr.getDebugLoc(),
> + Addr.getValueType(), R600Addr.Offset);
> + return true;
> + }
> + return false;
> +}
> +
> +bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
> + SDValue& Offset, SDValue &TGA) {
> + const R600Address R600Addr = SelectAddr(Addr, *CurDAG);
> + if (R600Addr.BaseReg.getNode()) {
> + TGA = CurDAG->getTargetGlobalAddress(R600Addr.Global, Addr.getDebugLoc(),
> + Addr.getValueType(), R600Addr.Offset);
> + Offset = R600Addr.BaseReg;
> + return true;
> + }
> + return false;
> +}
> +
> bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
> SDValue& Offset) {
> if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
> diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
> index 018234a..4875dac 100644
> --- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
> +++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
> @@ -64,8 +64,8 @@ private:
> void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
> raw_ostream &OS) const;
> void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
> - void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value,
> - raw_ostream &OS) const;
> + void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx,
> + raw_ostream &OS) const;
> void EmitDst(const MCInst &MI, raw_ostream &OS) const;
> void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
> raw_ostream &OS) const;
> @@ -165,7 +165,8 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
> case AMDGPU::VTX_READ_GLOBAL_i32_eg:
> case AMDGPU::VTX_READ_GLOBAL_f32_eg:
> case AMDGPU::VTX_READ_GLOBAL_v4i32_eg:
> - case AMDGPU::VTX_READ_GLOBAL_v4f32_eg: {
> + case AMDGPU::VTX_READ_GLOBAL_v4f32_eg:
> + case AMDGPU::TEX_VTX_CONSTBUF: {
> uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
> uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
>
> @@ -195,7 +196,6 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
> SmallVectorImpl<MCFixup> &Fixups,
> raw_ostream &OS) const {
> const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
> - unsigned NumOperands = MI.getNumOperands();
>
> // Emit instruction type
> EmitByte(INSTR_ALU, OS);
> @@ -211,19 +211,21 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
> InstWord01 |= ISAOpCode << 1;
> }
>
> - unsigned SrcIdx = 0;
> - for (unsigned int OpIdx = 1; OpIdx < NumOperands; ++OpIdx) {
> - if (MI.getOperand(OpIdx).isImm() || MI.getOperand(OpIdx).isFPImm() ||
> - OpIdx == (unsigned)MCDesc.findFirstPredOperandIdx()) {
> - continue;
> - }
> - EmitSrcISA(MI, OpIdx, InstWord01, OS);
> - SrcIdx++;
> - }
> + unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 :
> + MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1;
>
> - // Emit zeros for unused sources
> - for ( ; SrcIdx < 3; SrcIdx++) {
> - EmitNullBytes(SRC_BYTE_COUNT - 6, OS);
> + EmitByte(SrcNum, OS);
> +
> + const unsigned SrcOps[3][2] = {
> + {R600Operands::SRC0, R600Operands::SRC0_SEL},
> + {R600Operands::SRC1, R600Operands::SRC1_SEL},
> + {R600Operands::SRC2, R600Operands::SRC2_SEL}
> + };
> +
> + for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) {
> + unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]];
> + unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]];
> + EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS);
> }
>
> Emit(InstWord01, OS);
> @@ -294,34 +296,37 @@ void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
>
> }
>
> -void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx,
> - uint64_t &Value, raw_ostream &OS) const {
> - const MCOperand &MO = MI.getOperand(OpIdx);
> +void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx,
> + unsigned SelOpIdx, raw_ostream &OS) const {
> + const MCOperand &RegMO = MI.getOperand(RegOpIdx);
> + const MCOperand &SelMO = MI.getOperand(SelOpIdx);
> +
> union {
> float f;
> uint32_t i;
> } InlineConstant;
> InlineConstant.i = 0;
> - // Emit the source select (2 bytes). For GPRs, this is the register index.
> - // For other potential instruction operands, (e.g. constant registers) the
> - // value of the source select is defined in the r600isa docs.
> - if (MO.isReg()) {
> - unsigned Reg = MO.getReg();
> - if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) {
> - EmitByte(1, OS);
> - } else {
> - EmitByte(0, OS);
> - }
> + // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0
> + // and select is 0 (GPR index is encoded in the instr encoding. For constants
> + // type is 1 and select is the original const select passed from the driver.
> + unsigned Reg = RegMO.getReg();
> + if (Reg == AMDGPU::ALU_CONST) {
> + EmitByte(1, OS);
> + uint32_t Sel = SelMO.getImm();
> + Emit(Sel, OS);
> + } else {
> + EmitByte(0, OS);
> + Emit((uint32_t)0, OS);
> + }
>
> - if (Reg == AMDGPU::ALU_LITERAL_X) {
> - unsigned ImmOpIndex = MI.getNumOperands() - 1;
> - MCOperand ImmOp = MI.getOperand(ImmOpIndex);
> - if (ImmOp.isFPImm()) {
> - InlineConstant.f = ImmOp.getFPImm();
> - } else {
> - assert(ImmOp.isImm());
> - InlineConstant.i = ImmOp.getImm();
> - }
> + if (Reg == AMDGPU::ALU_LITERAL_X) {
> + unsigned ImmOpIndex = MI.getNumOperands() - 1;
> + MCOperand ImmOp = MI.getOperand(ImmOpIndex);
> + if (ImmOp.isFPImm()) {
> + InlineConstant.f = ImmOp.getFPImm();
> + } else {
> + assert(ImmOp.isImm());
> + InlineConstant.i = ImmOp.getImm();
> }
> }
>
> diff --git a/lib/Target/AMDGPU/R600Defines.h b/lib/Target/AMDGPU/R600Defines.h
> index 7dea8e4..e19eea3 100644
> --- a/lib/Target/AMDGPU/R600Defines.h
> +++ b/lib/Target/AMDGPU/R600Defines.h
> @@ -62,18 +62,33 @@ namespace R600Operands {
> SRC0_NEG,
> SRC0_REL,
> SRC0_ABS,
> + SRC0_SEL,
> SRC1,
> SRC1_NEG,
> SRC1_REL,
> SRC1_ABS,
> + SRC1_SEL,
> SRC2,
> SRC2_NEG,
> SRC2_REL,
> + SRC2_SEL,
> LAST,
> PRED_SEL,
> IMM,
> COUNT
> };
> +
> + const static int ALUOpTable[3][R600Operands::COUNT] = {
> +// W C S S S S S S S S S S S
> +// R O D L S R R R R S R R R R S R R R L P
> +// D U I M R A R C C C C R C C C C R C C C A R I
> +// S E U T O E M C 0 0 0 0 C 1 1 1 1 C 2 2 2 S E M
> +// T M P E D L P 0 N R A S 1 N R A S 2 N R S T D M
> + {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12},
> + {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19},
> + {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17}
> + };
> +
> }
>
> #endif // R600DEFINES_H_
> diff --git a/lib/Target/AMDGPU/R600EliminateSymbolicOperand.cpp b/lib/Target/AMDGPU/R600EliminateSymbolicOperand.cpp
> new file mode 100644
> index 0000000..3d01582
> --- /dev/null
> +++ b/lib/Target/AMDGPU/R600EliminateSymbolicOperand.cpp
> @@ -0,0 +1,122 @@
> +//===-- R600EliminateSymbolicOperand.cpp - Eliminate Symbolic Operands-----===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// Before this pass backend can manipulate symbolic operands like
> +/// GlobalAddress for data read from Const Buffers or FrameIndex for stack
> +/// allocated array. This pass is used to change these operands by a value
> +/// that can be passed to MCInstrEmitter.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "AMDGPU.h"
> +#include "llvm/CodeGen/MachineFunction.h"
> +#include "llvm/CodeGen/MachineFunctionPass.h"
> +#include "R600InstrInfo.h"
> +#include "llvm/GlobalValue.h"
> +
> +namespace llvm {
> +
> +class R600EliminateSymbolicOperand : public MachineFunctionPass {
> +private:
> + static char ID;
> + const R600InstrInfo *TII;
> +public:
> + R600EliminateSymbolicOperand(TargetMachine &tm);
> + virtual bool runOnMachineFunction(MachineFunction &MF);
> +
> + const char *getPassName() const { return "R600 Eliminate Symbolic Operand"; }
> +};
> +
> +char R600EliminateSymbolicOperand::ID = 0;
> +
> +
> +R600EliminateSymbolicOperand::R600EliminateSymbolicOperand(TargetMachine &tm) :
> + MachineFunctionPass(ID),
> + TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo()))
> +{
> +}
> +
> +static unsigned
> +getConstBufferIdxFromName(StringRef Name) {
> + if (Name == "const0")
> + return 0;
> + if (Name == "const1")
> + return 1;
> + if (Name == "const2")
> + return 2;
> + if (Name == "const3")
> + return 3;
> + if (Name == "const4")
> + return 4;
> + if (Name == "const5")
> + return 5;
> + if (Name == "const6")
> + return 6;
> + if (Name == "const7")
> + return 7;
> + if (Name == "const8")
> + return 8;
> + if (Name == "const9")
> + return 9;
> + if (Name == "const10")
> + return 10;
> + if (Name == "const11")
> + return 11;
> + if (Name == "const12")
> + return 12;
> + if (Name == "const13")
> + return 13;
> + if (Name == "const14")
> + return 14;
> + if (Name == "const15")
> + return 15;
> +}
> +
> +bool R600EliminateSymbolicOperand::runOnMachineFunction(MachineFunction &MF) {
> + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
> + BB != BB_E; ++BB) {
> + MachineBasicBlock &MBB = *BB;
> + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
> + I != E; ++I) {
> + MachineInstr &MI = *I;
> + if (!TII->isALUInstr(MI.getOpcode()))
> + continue;
> + for (int i = 0; i < MI.getNumOperands(); i++) {
> + MachineOperand &MO = MI.getOperand(i);
> + if (MO.isGlobal()) {
> + unsigned ConstSel = MO.getOffset() / 4
> + + 2048 +
> + 16384 * getConstBufferIdxFromName(MO.getGlobal()->getName());
> + int SelIdx;
> + if (i == TII->getOperandIdx(MI, R600Operands::SRC0)) {
> + SelIdx = TII->getOperandIdx(MI, R600Operands::SRC0_SEL);
> + } else if (i == TII->getOperandIdx(MI, R600Operands::SRC1)) {
> + SelIdx = TII->getOperandIdx(MI, R600Operands::SRC1_SEL);
> + } else if (i == TII->getOperandIdx(MI, R600Operands::SRC2)) {
> + SelIdx = TII->getOperandIdx(MI, R600Operands::SRC2_SEL);
> + } else {
> + assert(0 && "Wrong operand location for Global Address");
> + }
> + MI.getOperand(SelIdx).setImm(ConstSel);
> + MO.ChangeToRegister(AMDGPU::ALU_CONST, false);
> + }
> + }
> + }
> + }
> + return false;
> +}
> +
> +FunctionPass *createR600EliminateSymbolicOperandPass(TargetMachine &tm) {
> + return new R600EliminateSymbolicOperand(tm);
> +}
> +
> +}
> +
> +
> diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
> index d09f8c0e..6ff99c3 100644
> --- a/lib/Target/AMDGPU/R600ISelLowering.cpp
> +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
> @@ -16,6 +16,7 @@
> #include "R600Defines.h"
> #include "R600InstrInfo.h"
> #include "R600MachineFunctionInfo.h"
> +#include "AMDGPURegisterInfo.h"
> #include "llvm/Argument.h"
> #include "llvm/CodeGen/MachineInstrBuilder.h"
> #include "llvm/CodeGen/MachineRegisterInfo.h"
> @@ -97,7 +98,12 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
> setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
> setOperationAction(ISD::VSELECT, MVT::v4f32, Expand);
> setOperationAction(ISD::VSELECT, MVT::v4i32, Expand);
> + setOperationAction(ISD::LOAD, MVT::f32, Custom);
> + setOperationAction(ISD::LOAD, MVT::i32, Custom);
> + setOperationAction(ISD::LOAD, MVT::v4f32, Custom);
> + setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
> setTargetDAGCombine(ISD::FP_ROUND);
> + setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
>
> setSchedulingPreference(Sched::VLIW);
> }
> @@ -138,13 +144,9 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
> break;
> }
>
> - case AMDGPU::R600_LOAD_CONST: {
> - int64_t RegIndex = MI->getOperand(1).getImm();
> - unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
> - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
> - .addOperand(MI->getOperand(0))
> - .addReg(ConstantReg);
> - break;
> + case AMDGPU::TEX_VTX_CONSTBUF: {
> + MI->getOperand(2).ChangeToImmediate(MI->getOperand(2).getOffset() * 16);
> + return BB;
> }
>
> case AMDGPU::MASK_WRITE: {
> @@ -417,6 +419,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
> case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
> case ISD::SELECT: return LowerSELECT(Op, DAG);
> case ISD::SETCC: return LowerSETCC(Op, DAG);
> + case ISD::LOAD: return LowerLOAD(Op, DAG);
> case ISD::FPOW: return LowerFPOW(Op, DAG);
> case ISD::INTRINSIC_VOID: {
> SDValue Chain = Op.getOperand(0);
> @@ -580,6 +583,16 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
> switch (N->getOpcode()) {
> default: return;
> case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
> + return;
> + case ISD::LOAD: {
> + SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
> + Results.push_back(SDValue(Node, 0));
> + Results.push_back(SDValue(Node, 1));
> + // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
> + // function
> + DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
> + return;
> + }
> }
> }
>
> @@ -861,6 +874,75 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
> return Cond;
> }
>
> +const SDValue PtrSRL(SDValue Addr, unsigned Amount, SelectionDAG &DAG) {
> + switch (Addr.getOpcode()) {
> + case ISD::GlobalAddress:
> + case ISD::BITCAST:
> + return Addr;
> + case ISD::ADD:
> + case ISD::OR:
> + return DAG.getNode(Addr.getOpcode(), Addr.getDebugLoc(), Addr.getValueType(),
> + PtrSRL(Addr.getOperand(0), Amount, DAG),
> + PtrSRL(Addr.getOperand(1), Amount, DAG));
> + case ISD::SHL: {
> + ConstantSDNode *SHLPad = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
> + assert(SHLPad && "Not Constant SHLPad!");
> + unsigned PadAmount = SHLPad->getZExtValue();
> + //We suppose PadAmout > Amount
> + return DAG.getNode(ISD::SHL, Addr.getDebugLoc(), Addr.getValueType(),
> + Addr.getOperand(0),
> + DAG.getConstant(PadAmount - Amount, MVT::i32));
> + }
> + case ISD::Constant: {
> + ConstantSDNode *SHLPad = dyn_cast<ConstantSDNode>(Addr);
> + return DAG.getConstant(SHLPad->getZExtValue() >> Amount, MVT::i32);
> + }
> + default:
> + assert(0 && "Invalid ptr format!");
> + }
> +}
> +
> +SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
> +{
Coding style: Brace must co on same line as function.
> + EVT VT = Op.getValueType();
> + DebugLoc DL = Op.getDebugLoc();
> + LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
> + SDValue Chain = Op.getOperand(0);
> + SDValue Ptr = Op.getOperand(1);
> + SDValue LoweredLoad;
> +
> + if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) {
> + SDValue Result;
> + if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue())) {
> + // Ptr is GA + Cst : it can be folded, turns it into 4x f32
> + SDValue Slots[4];
> + for (unsigned i = 0; i < 4; i++) {
> + SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
> + DAG.getConstant(4 * i, MVT::i32));
> + Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::f32, NewPtr);
> + }
> + Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, Slots, 4);
> + } else {
> + // Ptr is GA + Reg : it cant be folded, keeps it as a v4f32 load
> + Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4f32,
> + PtrSRL(Ptr, 4, DAG));
> + }
> +
> + if (!VT.isVector()) {
> + Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Result,
> + DAG.getConstant(0, MVT::i32));
> + }
> +
> + SDValue MergedValues[2] = {
> + Result,
> + Chain
> + };
> + return DAG.getMergeValues(MergedValues, 2, DL);
> + }
> +
> + return SDValue();
> +}
> +
> SDValue R600TargetLowering::LowerFPOW(SDValue Op,
> SelectionDAG &DAG) const {
> DebugLoc DL = Op.getDebugLoc();
> @@ -918,6 +1000,17 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
> }
> break;
> }
> + // Extract_vec (Build_vector) generated by custom lowering
> + // also needs to be customly combined
> + case ISD::EXTRACT_VECTOR_ELT: {
> + SDValue Arg = N->getOperand(0);
> + if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
> + if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
> + unsigned Element = Const->getZExtValue();
> + return Arg->getOperand(Element);
> + }
> + }
> + }
> }
> return SDValue();
> }
> diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h
> index bdb0a55..70ece84 100644
> --- a/lib/Target/AMDGPU/R600ISelLowering.h
> +++ b/lib/Target/AMDGPU/R600ISelLowering.h
> @@ -62,6 +62,7 @@ private:
> SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
> SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
> SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
> + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
>
> bool isZero(SDValue Op) const;
> };
> diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp
> index a60a180..6c1c50a 100644
> --- a/lib/Target/AMDGPU/R600InstrInfo.cpp
> +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp
> @@ -484,13 +484,15 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB
> .addReg(Src0Reg) // $src0
> .addImm(0) // $src0_neg
> .addImm(0) // $src0_rel
> - .addImm(0); // $src0_abs
> + .addImm(0) // $src0_abs
> + .addImm(0); // $src0_sel
>
> if (Src1Reg) {
> MIB.addReg(Src1Reg) // $src1
> .addImm(0) // $src1_neg
> .addImm(0) // $src1_rel
> - .addImm(0); // $src1_abs
> + .addImm(0) // $src1_abs
> + .addImm(0); // $src1_sel
> }
>
> //XXX: The r600g finalizer expects this to be 1, once we've moved the
> @@ -519,16 +521,6 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
>
> int R600InstrInfo::getOperandIdx(unsigned Opcode,
> R600Operands::Ops Op) const {
> - const static int OpTable[3][R600Operands::COUNT] = {
> -// W C S S S S S S S S
> -// R O D L S R R R S R R R S R R L P
> -// D U I M R A R C C C C C C C R C C A R I
> -// S E U T O E M C 0 0 0 C 1 1 1 C 2 2 S E M
> -// T M P E D L P 0 N R A 1 N R A 2 N R T D M
> - {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8,-1,-1,-1,-1,-1,-1,-1, 9,10,11},
> - {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,-1,-1,-1,13,14,15,16,17},
> - {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8,-1, 9,10,11,12,13,14}
> - };
> unsigned TargetFlags = get(Opcode).TSFlags;
> unsigned OpTableIdx;
>
> @@ -554,7 +546,7 @@ int R600InstrInfo::getOperandIdx(unsigned Opcode,
> OpTableIdx = 2;
> }
>
> - return OpTable[OpTableIdx][Op];
> + return R600Operands::ALUOpTable[OpTableIdx][Op];
> }
>
> void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
> diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
> index dc32368..691b1c8 100644
> --- a/lib/Target/AMDGPU/R600Instructions.td
> +++ b/lib/Target/AMDGPU/R600Instructions.td
> @@ -69,6 +69,8 @@ class InstFlag<string PM = "printOperand", int Default = 0>
> let PrintMethod = PM;
> }
>
> +def SEL : OperandWithDefaultOps <i32, (ops (i32 0))>;
> +
> def LITERAL : InstFlag<"printLiteral">;
>
> def WRITE : InstFlag <"printWrite", 1>;
> @@ -88,6 +90,8 @@ def LAST : InstFlag<"printLast", 1>;
> def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
> def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
> def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
> +def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
> +def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
>
> class R600ALU_Word0 {
> field bits<32> Word0;
> @@ -262,7 +266,7 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
> InstR600 <0,
> (outs R600_Reg32:$dst),
> (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
> - R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
> + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
> LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
> !strconcat(opName,
> "$clamp $dst$write$dst_rel$omod, "
> @@ -302,8 +306,8 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
> (outs R600_Reg32:$dst),
> (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
> OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
> - R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
> - R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs,
> + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
> + R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
> LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
> !strconcat(opName,
> "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
> @@ -339,9 +343,9 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
> InstR600 <0,
> (outs R600_Reg32:$dst),
> (ins REL:$dst_rel, CLAMP:$clamp,
> - R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel,
> - R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel,
> - R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel,
> + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
> + R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
> + R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
> LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
> !strconcat(opName, "$clamp $dst$dst_rel, "
> "$src0_neg$src0$src0_rel, "
> @@ -1596,6 +1600,74 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
> "RETURN", [(IL_retflag)]>;
> }
>
> +
> +//===----------------------------------------------------------------------===//
> +// Constant Buffer Addressing Support
> +//===----------------------------------------------------------------------===//
> +
> +def CONST_COPY : R600_1OP <0x19, "CONST_COPY",
> + [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src0))]>
> +{
> + let mayLoad = 1;
> +}
> +
> +def TEX_VTX_CONSTBUF :
> + InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr",
> + [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))]>,
> + VTX_WORD1_GPR, VTX_WORD0 {
> +
> + let usesCustomInserter = 1;
> +
> + let VC_INST = 0;
> + let FETCH_TYPE = 2;
> + let FETCH_WHOLE_QUAD = 0;
> + let BUFFER_ID = 0;
> + let SRC_REL = 0;
> + let SRC_SEL_X = 0;
> + let DST_REL = 0;
> + let USE_CONST_FIELDS = 0;
> + let NUM_FORMAT_ALL = 2;
> + let FORMAT_COMP_ALL = 1;
> + let SRF_MODE_ALL = 1;
> + let MEGA_FETCH_COUNT = 16;
> + let DST_SEL_X = 0;
> + let DST_SEL_Y = 1;
> + let DST_SEL_Z = 2;
> + let DST_SEL_W = 3;
> + let DATA_FORMAT = 35;
> +
> + let Inst{31-0} = Word0;
> + let Inst{63-32} = Word1;
> +
> +// LLVM can only encode 64-bit instructions, so these fields are manually
> +// encoded in R600CodeEmitter
> +//
> +// bits<16> OFFSET;
> +// bits<2> ENDIAN_SWAP = 0;
> +// bits<1> CONST_BUF_NO_STRIDE = 0;
> +// bits<1> MEGA_FETCH = 0;
> +// bits<1> ALT_CONST = 0;
> +// bits<2> BUFFER_INDEX_MODE = 0;
> +
> +
> +
> +// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
> +// is done in R600CodeEmitter
> +//
> +// Inst{79-64} = OFFSET;
> +// Inst{81-80} = ENDIAN_SWAP;
> +// Inst{82} = CONST_BUF_NO_STRIDE;
> +// Inst{83} = MEGA_FETCH;
> +// Inst{84} = ALT_CONST;
> +// Inst{86-85} = BUFFER_INDEX_MODE;
> +// Inst{95-86} = 0; Reserved
> +
> +// VTX_WORD3 (Padding)
> +//
> +// Inst{127-96} = 0;
> +}
> +
> +
> //===----------------------------------------------------------------------===//
> // ISel Patterns
> //===----------------------------------------------------------------------===//
> diff --git a/lib/Target/AMDGPU/R600RegisterInfo.cpp b/lib/Target/AMDGPU/R600RegisterInfo.cpp
> index a39f83d..397fbaf 100644
> --- a/lib/Target/AMDGPU/R600RegisterInfo.cpp
> +++ b/lib/Target/AMDGPU/R600RegisterInfo.cpp
> @@ -15,6 +15,7 @@
> #include "R600RegisterInfo.h"
> #include "AMDGPUTargetMachine.h"
> #include "R600Defines.h"
> +#include "R600InstrInfo.h"
> #include "R600MachineFunctionInfo.h"
>
> using namespace llvm;
> @@ -38,16 +39,12 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
> Reserved.set(AMDGPU::NEG_ONE);
> Reserved.set(AMDGPU::PV_X);
> Reserved.set(AMDGPU::ALU_LITERAL_X);
> + Reserved.set(AMDGPU::ALU_CONST);
> Reserved.set(AMDGPU::PREDICATE_BIT);
> Reserved.set(AMDGPU::PRED_SEL_OFF);
> Reserved.set(AMDGPU::PRED_SEL_ZERO);
> Reserved.set(AMDGPU::PRED_SEL_ONE);
>
> - for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(),
> - E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) {
> - Reserved.set(*I);
> - }
> -
> for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
> E = MFI->ReservedRegs.end(); I != E; ++I) {
> Reserved.set(*I);
> diff --git a/lib/Target/AMDGPU/R600RegisterInfo.td b/lib/Target/AMDGPU/R600RegisterInfo.td
> index d3d6d25..10fe858 100644
> --- a/lib/Target/AMDGPU/R600RegisterInfo.td
> +++ b/lib/Target/AMDGPU/R600RegisterInfo.td
> @@ -28,9 +28,6 @@ foreach Index = 0-127 in {
> // 32-bit Temporary Registers
> def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
>
> - // 32-bit Constant Registers (There are more than 128, this the number
> - // that is currently supported.
> - def C#Index#_#Chan : R600RegWithChan <"C"#Index#"."#Chan, Index, Chan>;
> }
> // 128-bit Temporary Registers
> def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
> @@ -46,7 +43,6 @@ foreach Index = 448-464 in {
> def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>;
> }
>
> -
> // Special Registers
>
> def ZERO : R600Reg<"0.0", 248>;
> @@ -61,16 +57,11 @@ def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
> def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
> def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
> def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
> +def ALU_CONST : R600Reg<"Const", 0>;
>
> def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
> (add (sequence "ArrayBase%u", 448, 464))>;
>
> -def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
> - (add (interleave
> - (interleave (sequence "C%u_X", 0, 127),
> - (sequence "C%u_Z", 0, 127)),
> - (interleave (sequence "C%u_Y", 0, 127),
> - (sequence "C%u_W", 0, 127))))>;
>
> def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
> (add (sequence "T%u_X", 0, 127))>;
> @@ -91,7 +82,6 @@ def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
>
> def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
> R600_TReg32,
> - R600_CReg32,
> R600_ArrayBase,
> ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
>
> --
> 1.8.0.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list