[Mesa-dev] [PATCH 1/2] R600: Fold remaining CONST_COPY after expand pseudo inst
Tom Stellard
tom at stellard.net
Wed Jan 23 13:26:08 PST 2013
On Wed, Jan 23, 2013 at 09:21:14PM +0100, Vincent Lejeune wrote:
> v2:fix a bug with write masked inst
I usually only version patches if I've already submitted a previous version to
the mailiing list, so if you haven't submitted a version of this patch before
you don't need this. If this is really a v2, then you should also add v2
to the first line of the commit message.
> ---
> lib/Target/R600/AMDGPUTargetMachine.cpp | 2 +-
> lib/Target/R600/R600LowerConstCopy.cpp | 164 +++++++++++++++++++++++++++++---
> 2 files changed, 154 insertions(+), 12 deletions(-)
>
> diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
> index 7b069e7..2185be3 100644
> --- a/lib/Target/R600/AMDGPUTargetMachine.cpp
> +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
> @@ -136,8 +136,8 @@ bool AMDGPUPassConfig::addPreEmitPass() {
> addPass(createAMDGPUCFGPreparationPass(*TM));
> addPass(createAMDGPUCFGStructurizerPass(*TM));
> addPass(createR600ExpandSpecialInstrsPass(*TM));
> - addPass(createR600LowerConstCopy(*TM));
> addPass(&FinalizeMachineBundlesID);
> + addPass(createR600LowerConstCopy(*TM));
> } else {
> addPass(createSILowerLiteralConstantsPass(*TM));
> addPass(createSILowerControlFlowPass(*TM));
> diff --git a/lib/Target/R600/R600LowerConstCopy.cpp b/lib/Target/R600/R600LowerConstCopy.cpp
> index d14ae20..74260ad 100644
> --- a/lib/Target/R600/R600LowerConstCopy.cpp
> +++ b/lib/Target/R600/R600LowerConstCopy.cpp
> @@ -13,7 +13,6 @@
> /// fold them inside vector instruction, like DOT4 or Cube ; ISel emits
> /// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will try
> /// to fold them if possible or replace them by MOV otherwise.
> -/// TODO : Implement the folding part, using Copy Propagation algorithm.
> //
> //===----------------------------------------------------------------------===//
>
> @@ -28,8 +27,16 @@ namespace llvm {
>
> class R600LowerConstCopy : public MachineFunctionPass {
> private:
> + typedef DenseMap<unsigned, MachineInstr *> SourceMap;
I'm not a huge fan of typedefs like this, I know it may save some typing,
but I'd rather see the full type name everywhere so I know for sure what
the data structure is.
> static char ID;
> const R600InstrInfo *TII;
> +
> + struct ConstPairs {
> + unsigned XYPair;
> + unsigned ZWPair;
> + };
> +
> + bool canFoldInBundle(ConstPairs &UsedConst, unsigned ReadConst) const;
> public:
> R600LowerConstCopy(TargetMachine &tm);
> virtual bool runOnMachineFunction(MachineFunction &MF);
> @@ -39,27 +46,162 @@ public:
>
> char R600LowerConstCopy::ID = 0;
>
> -
> R600LowerConstCopy::R600LowerConstCopy(TargetMachine &tm) :
> MachineFunctionPass(ID),
> TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo()))
> {
> }
>
> +bool R600LowerConstCopy::canFoldInBundle(ConstPairs &UsedConst,
> + unsigned ReadConst) const {
> + unsigned ReadConstChan = ReadConst & 3;
> + unsigned ReadConstIndex = ReadConst & (~3);
> + if (ReadConstChan < 2) {
> + if (!UsedConst.XYPair) {
> + UsedConst.XYPair = ReadConstIndex;
> + }
> + return UsedConst.XYPair == ReadConstIndex;
> + } else {
> + if (!UsedConst.ZWPair) {
> + UsedConst.ZWPair = ReadConstIndex;
> + }
> + return UsedConst.ZWPair == ReadConstIndex;
> + }
> +}
> +
> +static bool isControlFlow(const MachineInstr &MI) {
> + return (MI.getOpcode() == AMDGPU::IF_PREDICATE_SET) ||
> + (MI.getOpcode() == AMDGPU::ENDIF) ||
> + (MI.getOpcode() == AMDGPU::ELSE) ||
> + (MI.getOpcode() == AMDGPU::WHILELOOP) ||
> + (MI.getOpcode() == AMDGPU::BREAK);
> +}
> +
> bool R600LowerConstCopy::runOnMachineFunction(MachineFunction &MF) {
> +
> for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
> BB != BB_E; ++BB) {
> MachineBasicBlock &MBB = *BB;
> - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
> - I != E;) {
> - MachineInstr &MI = *I;
> - I = llvm::next(I);
> - if (MI.getOpcode() != AMDGPU::CONST_COPY)
> + SourceMap RegToConstIndex;
> + for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(),
> + E = MBB.instr_end(); I != E;) {
> +
> + if (I->getOpcode() == AMDGPU::CONST_COPY) {
> + MachineInstr &MI = *I;
> + I = llvm::next(I);
> + unsigned DstReg = MI.getOperand(0).getReg();
> + SourceMap::iterator SrcMI = RegToConstIndex.find(DstReg);
> + if (SrcMI != RegToConstIndex.end()) {
> + SrcMI->second->eraseFromParent();
> + RegToConstIndex.erase(SrcMI);
> + }
> + MachineInstr *NewMI =
> + TII->buildDefaultInstruction(MBB, &MI, AMDGPU::MOV,
> + MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
> + NewMI->getOperand(9).setImm(MI.getOperand(1).getImm());
There is a helper function R600InstrInfo::setImmOperand(), that you can
use here and other places where you have similar code.
> + RegToConstIndex[DstReg] = NewMI;
> + MI.eraseFromParent();
> continue;
> - MachineInstr *NewMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::MOV,
> - MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
> - NewMI->getOperand(9).setImm(MI.getOperand(1).getImm());
> - MI.eraseFromParent();
> + }
> +
> + std::vector<unsigned> Defs;
> + // We consider all Instructions as bundled because algorithm that handle
> + // const read port limitations inside an IG is still valid with single
> + // instructions.
> + std::vector<MachineInstr *> Bundle;
> +
> + if (I->isBundle()) {
> + unsigned BundleSize = I->getBundleSize();
> + for (unsigned i = 0; i < BundleSize; i++) {
> + I = llvm::next(I);
> + Bundle.push_back(I);
> + }
> + } else if (TII->isALUInstr(I->getOpcode())){
> + Bundle.push_back(I);
> + } else if (isControlFlow(*I)) {
> + RegToConstIndex.clear();
> + I = llvm::next(I);
> + continue;
> + } else {
> + MachineInstr &MI = *I;
> + for (MachineInstr::mop_iterator MOp = MI.operands_begin(),
> + MOpE = MI.operands_end(); MOp != MOpE; ++MOp) {
> + MachineOperand &MO = *MOp;
> + if (!MO.isReg())
> + continue;
> + if (MO.isDef()) {
> + Defs.push_back(MO.getReg());
> + } else {
> + // Either a TEX or an Export inst, prevent from erasing def of used
> + // operand
> + RegToConstIndex.erase(MO.getReg());
> + for (MCSubRegIterator SR(MO.getReg(), &TII->getRegisterInfo());
> + SR.isValid(); ++SR) {
> + RegToConstIndex.erase(*SR);
> + }
> + }
> + }
> + }
> +
> +
> + R600Operands::Ops OpTable[3][2] = {
> + {R600Operands::SRC0, R600Operands::SRC0_SEL},
> + {R600Operands::SRC1, R600Operands::SRC1_SEL},
> + {R600Operands::SRC2, R600Operands::SRC2_SEL},
> + };
> +
> + for(std::vector<MachineInstr *>::iterator It = Bundle.begin(),
> + ItE = Bundle.end(); It != ItE; ++It) {
> + MachineInstr *MI = *It;
> + if (TII->isPredicated(MI)) {
> + // We don't want to erase previous assignment
> + RegToConstIndex.erase(MI->getOperand(0).getReg());
> + } else {
> + int WriteIDX = TII->getOperandIdx(MI->getOpcode(), R600Operands::WRITE);
> + if (WriteIDX < 0 || MI->getOperand(WriteIDX).getImm())
> + Defs.push_back(MI->getOperand(0).getReg());
> + }
> + }
> +
> + ConstPairs CP = {0,0};
> + for (unsigned SrcOp = 0; SrcOp < 3; SrcOp++) {
> + for(std::vector<MachineInstr *>::iterator It = Bundle.begin(),
> + ItE = Bundle.end(); It != ItE; ++It) {
> + MachineInstr *MI = *It;
> + int SrcIdx = TII->getOperandIdx(MI->getOpcode(), OpTable[SrcOp][0]);
> + if (SrcIdx < 0)
> + continue;
> + MachineOperand &MO = MI->getOperand(SrcIdx);
> + SourceMap::iterator SrcMI = RegToConstIndex.find(MO.getReg());
> + if (SrcMI != RegToConstIndex.end()) {
> + int SrcSel = TII->getOperandIdx(MI->getOpcode(), OpTable[SrcOp][1]);
> + unsigned ConstIndex = SrcMI->second->getOperand(9).getImm();
You can use teh setImmOperand helper function here too.
> + if (canFoldInBundle(CP, ConstIndex)) {
> + MI->getOperand(SrcSel).setImm(ConstIndex);
> + MI->getOperand(SrcIdx).setReg(AMDGPU::ALU_CONST);
> + } else {
> + RegToConstIndex.erase(SrcMI);
> + }
> + }
> + }
> + }
> +
> + for (std::vector<unsigned>::iterator It = Defs.begin(), ItE = Defs.end();
> + It != ItE; ++It) {
> + SourceMap::iterator SrcMI = RegToConstIndex.find(*It);
> + if (SrcMI != RegToConstIndex.end()) {
> + SrcMI->second->eraseFromParent();
> + RegToConstIndex.erase(SrcMI);
> + }
> + }
> + I = llvm::next(I);
> + }
> +
> + if (MBB.succ_empty()) {
> + for (SourceMap::iterator DI = RegToConstIndex.begin(),
> + DE = RegToConstIndex.end(); DI != DE; ++DI) {
> + DI->second->eraseFromParent();
> + }
> }
> }
> return false;
> --
> 1.8.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list