[Mesa-dev] [PATCH 6/6] R600: initial scheduler code

Tue Feb 19 07:34:54 PST 2013

Hi Vincent,

>From now on, please cc llvm-commits at cs.uiuc.edu when you submit a patch.
I'm cc'ing that list now.

This looks OK to me at first glance, but I would like to test it with
compute shaders before you merge it.

On Mon, Feb 18, 2013 at 05:27:30PM +0100, Vincent Lejeune wrote:
> From: Vadim Girlin <vadimgirlin at gmail.com>
> 
> This is a skeleton for a pre-RA MachineInstr scheduler strategy. Currently
> it only tries to expose more parallelism for ALU instructions (this also
> makes the distribution of GPR channels more uniform and increases the
> chances of ALU instructions to be packed together in a single VLIW group).
> Also it tries to reduce clause switching by grouping instruction of the
> same kind (ALU/FETCH/CF) together.
> 
> Vincent Lejeune:
>  - Support for VLIW4 Slot assignement
>  - Recomputation of ScheduleDAG to get more parallelism opportunities
> 
> Tom Stellard:
>  - Fix assertion failure when trying to determine an instruction's slot
>    based on its destination register's class
>  - Fix some compiler warnings
> 
> Vincent Lejeune: [v2]
>  - Remove recomputation of ScheduleDAG (will be provided in a later patch)
>  - Improve estimation of an ALU clause size so that heuristic does not emit cf
>  instructions at the wrong position.
>  - Make schedule heuristic smarter using SUnit Depth
>  - Take constant read limitations into account
> ---
>  lib/Target/R600/AMDGPUTargetMachine.cpp  |  17 +-
>  lib/Target/R600/R600MachineScheduler.cpp | 483 +++++++++++++++++++++++++++++++
>  lib/Target/R600/R600MachineScheduler.h   | 121 ++++++++
>  test/CodeGen/R600/fdiv.v4f32.ll          |   6 +-
>  4 files changed, 623 insertions(+), 4 deletions(-)
>  create mode 100644 lib/Target/R600/R600MachineScheduler.cpp
>  create mode 100644 lib/Target/R600/R600MachineScheduler.h
> 
> diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
> index 70b34b0..eb58853 100644
> --- a/lib/Target/R600/AMDGPUTargetMachine.cpp
> +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
> @@ -17,6 +17,7 @@
>  #include "AMDGPU.h"
>  #include "R600ISelLowering.h"
>  #include "R600InstrInfo.h"
> +#include "R600MachineScheduler.h"
>  #include "SIISelLowering.h"
>  #include "SIInstrInfo.h"
>  #include "llvm/Analysis/Passes.h"
> @@ -39,6 +40,14 @@ extern "C" void LLVMInitializeR600Target() {
>    RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
>  }
>  
> +static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
> +  return new ScheduleDAGMI(C, new R600SchedStrategy());
> +}
> +
> +static MachineSchedRegistry
> +SchedCustomRegistry("r600", "Run R600's custom scheduler",
> +                    createR600MachineScheduler);
> +
>  AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
>      StringRef CPU, StringRef FS,
>    TargetOptions Options,
> @@ -70,7 +79,13 @@ namespace {
>  class AMDGPUPassConfig : public TargetPassConfig {
>  public:
>    AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
> -    : TargetPassConfig(TM, PM) {}
> +    : TargetPassConfig(TM, PM) {
> +    const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
> +    if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
> +      enablePass(&MachineSchedulerID);
> +      MachineSchedRegistry::setDefault(createR600MachineScheduler);
> +    }
> +  }
>  
>    AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
>      return getTM<AMDGPUTargetMachine>();
> diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
> new file mode 100644
> index 0000000..efd9490
> --- /dev/null
> +++ b/lib/Target/R600/R600MachineScheduler.cpp
> @@ -0,0 +1,483 @@
> +//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// \brief R600 Machine Scheduler interface
> +// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#define DEBUG_TYPE "misched"
> +
> +#include "R600MachineScheduler.h"
> +#include "llvm/CodeGen/MachineRegisterInfo.h"
> +#include "llvm/CodeGen/LiveIntervalAnalysis.h"
> +#include "llvm/Pass.h"
> +#include "llvm/PassManager.h"
> +#include <set>
> +#include <iostream>
> +using namespace llvm;
> +
> +void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
> +
> +  DAG = dag;
> +  TII = static_cast<const R600InstrInfo*>(DAG->TII);
> +  TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
> +  MRI = &DAG->MRI;
> +  Available[IDAlu]->clear();
> +  Available[IDFetch]->clear();
> +  Available[IDOther]->clear();
> +  CurInstKind = IDOther;
> +  CurEmitted = 0;
> +  memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
> +  InstKindLimit[IDAlu] = 120; // 120 minus 8 for security
> +
> +
> +  const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
> +  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD5XXX) {
> +    InstKindLimit[IDFetch] = 7; // 8 minus 1 for security
> +  } else {
> +    InstKindLimit[IDFetch] = 15; // 16 minus 1 for security
> +  }
> +}
> +
> +void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst)
> +{
> +  if (QSrc->empty())
> +    return;
> +  for (ReadyQueue::iterator I = QSrc->begin(),
> +      E = QSrc->end(); I != E; ++I) {
> +    (*I)->NodeQueueId &= ~QSrc->getID();
> +    QDst->push(*I);
> +  }
> +  QSrc->clear();
> +}
> +
> +SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
> +  SUnit *SU = 0;
> +  IsTopNode = true;
> +  NextInstKind = IDOther;
> +
> +  // check if we might want to switch current clause type
> +  bool AllowSwitchToAlu = (CurInstKind == IDOther) ||
> +      (CurEmitted > InstKindLimit[CurInstKind]) ||
> +      (Available[CurInstKind]->empty());
> +  bool AllowSwitchFromAlu = (CurEmitted > InstKindLimit[CurInstKind]) &&
> +      (!Available[IDFetch]->empty() || !Available[IDOther]->empty());
> +
> +  if ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
> +      (!AllowSwitchFromAlu && CurInstKind == IDAlu)) {
> +    // try to pick ALU
> +    SU = pickAlu();
> +    if (SU)
> +      if (CurEmitted >  InstKindLimit[IDAlu])
> +        CurEmitted = 0;
> +      NextInstKind = IDAlu;
> +  }
> +
> +  if (!SU) {
> +    // try to pick FETCH
> +    SU = pickOther(IDFetch);
> +    if (SU)
> +      NextInstKind = IDFetch;
> +  }
> +
> +  // try to pick other
> +  if (!SU) {
> +    SU = pickOther(IDOther);
> +    if (SU)
> +      NextInstKind = IDOther;
> +  }
> +
> +  DEBUG(
> +      if (SU) {
> +        dbgs() << "picked node: ";
> +        SU->dump(DAG);
> +      } else {
> +        dbgs() << "NO NODE ";
> +        for (int i = 0; i < IDLast; ++i) {
> +          Available[i]->dump();
> +          Pending[i]->dump();
> +        }
> +        for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
> +          const SUnit &S = DAG->SUnits[i];
> +          if (!S.isScheduled)
> +            S.dump(DAG);
> +        }
> +      }
> +  );
> +
> +  return SU;
> +}
> +
> +void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
> +
> +  DEBUG(dbgs() << "scheduled: ");
> +  DEBUG(SU->dump(DAG));
> +
> +  if (NextInstKind != CurInstKind) {
> +    DEBUG(dbgs() << "Instruction Type Switch\n");
> +    if (NextInstKind != IDAlu)
> +      OccupedSlotsMask = 0;
> +    CurEmitted = 0;
> +    CurInstKind = NextInstKind;
> +  }
> +
> +  if (CurInstKind == IDAlu) {
> +    switch (getAluKind(SU)) {
> +    case AluT_XYZW:
> +      CurEmitted += 4;
> +      break;
> +    case AluDiscarded:
> +      break;
> +    default: {
> +      ++CurEmitted;
> +      for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
> +          E = SU->getInstr()->operands_end(); It != E; ++It) {
> +        MachineOperand &MO = *It;
> +        if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
> +          ++CurEmitted;
> +      }
> +    }
> +    }
> +  } else {
> +    ++CurEmitted;
> +  }
> +
> +
> +  DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
> +
> +  if (CurInstKind != IDFetch) {
> +    MoveUnits(Pending[IDFetch], Available[IDFetch]);
> +  }
> +  MoveUnits(Pending[IDOther], Available[IDOther]);
> +}
> +
> +void R600SchedStrategy::releaseTopNode(SUnit *SU) {
> +  int IK = getInstKind(SU);
> +
> +  DEBUG(dbgs() << IK << " <= ");
> +  DEBUG(SU->dump(DAG));
> +
> +  Pending[IK]->push(SU);
> +}
> +
> +void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
> +}
> +
> +bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
> +                                          const TargetRegisterClass *RC) const {
> +  if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
> +    return RC->contains(Reg);
> +  } else {
> +    return MRI->getRegClass(Reg) == RC;
> +  }
> +}
> +
> +R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
> +  MachineInstr *MI = SU->getInstr();
> +
> +    switch (MI->getOpcode()) {
> +    case AMDGPU::INTERP_PAIR_XY:
> +    case AMDGPU::INTERP_PAIR_ZW:
> +    case AMDGPU::INTERP_VEC_LOAD:
> +      return AluT_XYZW;
> +    case AMDGPU::COPY:
> +      if (TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) {
> +        // %vregX = COPY Tn_X is likely to be discarded in favor of an
> +        // assignement of Tn_X to %vregX, don't considers it in scheduling
> +        return AluDiscarded;
> +      }
> +      else if (MI->getOperand(1).isUndef()) {
> +        // MI will become a KILL, don't considers it in scheduling
> +        return AluDiscarded;
> +      }
> +    default:
> +      break;
> +    }
> +
> +    // Does the instruction take a whole IG ?
> +    if(TII->isVector(*MI) ||
> +        TII->isCubeOp(MI->getOpcode()) ||
> +        TII->isReductionOp(MI->getOpcode()))
> +      return AluT_XYZW;
> +
> +    // Is the result already assigned to a channel ?
> +    unsigned DestSubReg = MI->getOperand(0).getSubReg();
> +    switch (DestSubReg) {
> +    case AMDGPU::sub0:
> +      return AluT_X;
> +    case AMDGPU::sub1:
> +      return AluT_Y;
> +    case AMDGPU::sub2:
> +      return AluT_Z;
> +    case AMDGPU::sub3:
> +      return AluT_W;
> +    default:
> +      break;
> +    }
> +
> +    // Is the result already member of a X/Y/Z/W class ?
> +    unsigned DestReg = MI->getOperand(0).getReg();
> +    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
> +        regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
> +      return AluT_X;
> +    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
> +      return AluT_Y;
> +    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
> +      return AluT_Z;
> +    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
> +      return AluT_W;
> +    if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
> +      return AluT_XYZW;
> +
> +    return AluAny;
> +
> +}
> +
> +int R600SchedStrategy::getInstKind(SUnit* SU) {
> +  int Opcode = SU->getInstr()->getOpcode();
> +
> +  if (TII->isALUInstr(Opcode)) {
> +    return IDAlu;
> +  }
> +
> +  switch (Opcode) {
> +  case AMDGPU::COPY:
> +  case AMDGPU::CONST_COPY:
> +  case AMDGPU::INTERP_PAIR_XY:
> +  case AMDGPU::INTERP_PAIR_ZW:
> +  case AMDGPU::INTERP_VEC_LOAD:
> +  case AMDGPU::DOT4_eg_pseudo:
> +  case AMDGPU::DOT4_r600_pseudo:
> +    return IDAlu;
> +  case AMDGPU::TEX_VTX_CONSTBUF:
> +  case AMDGPU::TEX_VTX_TEXBUF:
> +  case AMDGPU::TEX_LD:
> +  case AMDGPU::TEX_GET_TEXTURE_RESINFO:
> +  case AMDGPU::TEX_GET_GRADIENTS_H:
> +  case AMDGPU::TEX_GET_GRADIENTS_V:
> +  case AMDGPU::TEX_SET_GRADIENTS_H:
> +  case AMDGPU::TEX_SET_GRADIENTS_V:
> +  case AMDGPU::TEX_SAMPLE:
> +  case AMDGPU::TEX_SAMPLE_C:
> +  case AMDGPU::TEX_SAMPLE_L:
> +  case AMDGPU::TEX_SAMPLE_C_L:
> +  case AMDGPU::TEX_SAMPLE_LB:
> +  case AMDGPU::TEX_SAMPLE_C_LB:
> +  case AMDGPU::TEX_SAMPLE_G:
> +  case AMDGPU::TEX_SAMPLE_C_G:
> +  case AMDGPU::TXD:
> +  case AMDGPU::TXD_SHADOW:
> +    return IDFetch;
> +  default:
> +    DEBUG(
> +        dbgs() << "other inst: ";
> +        SU->dump(DAG);
> +    );
> +    return IDOther;
> +  }
> +}
> +
> +class ConstPairs {
> +private:
> +  unsigned XYPair;
> +  unsigned ZWPair;
> +public:
> +  ConstPairs(unsigned ReadConst[3]) {
> +    for (unsigned i = 0; i < 3; i++) {
> +      unsigned ReadConstChan = ReadConst[i] & 3;
> +      unsigned ReadConstIndex = ReadConst[i] & (~3);
> +      if (ReadConstChan < 2) {
> +        if (!XYPair) {
> +          XYPair = ReadConstIndex;
> +        }
> +      } else {
> +        if (!ZWPair) {
> +          ZWPair = ReadConstIndex;
> +        }
> +      }
> +    }
> +  }
> +
> +  bool isCompatibleWith(const ConstPairs& CP) const {
> +    return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) &&
> +        (!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair);
> +  }
> +};
> +
> +static
> +const ConstPairs getPairs(const R600InstrInfo *TII, const MachineInstr& MI) {
> +  unsigned ReadConsts[3] = {0, 0, 0};
> +  R600Operands::Ops OpTable[3][2] = {
> +    {R600Operands::SRC0, R600Operands::SRC0_SEL},
> +    {R600Operands::SRC1, R600Operands::SRC1_SEL},
> +    {R600Operands::SRC2, R600Operands::SRC2_SEL},
> +  };
> +
> +  if (!TII->isALUInstr(MI.getOpcode()))
> +    return ConstPairs(ReadConsts);
> +
> +  for (unsigned i = 0; i < 3; i++) {
> +    int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]);
> +    if (SrcIdx < 0)
> +      break;
> +    if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST)
> +      ReadConsts[i] =MI.getOperand(
> +          TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm();
> +  }
> +  return ConstPairs(ReadConsts);
> +}
> +
> +bool
> +R600SchedStrategy::isBundleable(const MachineInstr& MI) {
> +  const ConstPairs &MIPair = getPairs(TII, MI);
> +  for (unsigned i = 0; i < 4; i++) {
> +    if (!InstructionsGroupCandidate[i])
> +      continue;
> +    const ConstPairs &IGPair = getPairs(TII,
> +        *InstructionsGroupCandidate[i]->getInstr());
> +    if (!IGPair.isCompatibleWith(MIPair))
> +      return false;
> +  }
> +  return true;
> +}
> +
> +SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) {
> +  if (Q.empty())
> +    return NULL;
> +  for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end();
> +      It != E; ++It) {
> +    SUnit *SU = *It;
> +    if (isBundleable(*SU->getInstr())) {
> +      Q.erase(It);
> +      return SU;
> +    }
> +  }
> +  return NULL;
> +}
> +
> +void R600SchedStrategy::LoadAlu() {
> +  ReadyQueue *QSrc = Pending[IDAlu];
> +  for (ReadyQueue::iterator I = QSrc->begin(),
> +        E = QSrc->end(); I != E; ++I) {
> +      (*I)->NodeQueueId &= ~QSrc->getID();
> +      AluKind AK = getAluKind(*I);
> +      AvailableAlus[AK].insert(*I);
> +    }
> +    QSrc->clear();
> +}
> +
> +void R600SchedStrategy::PrepareNextSlot() {
> +  DEBUG(dbgs() << "New Slot\n");
> +  OccupedSlotsMask = 0;
> +  memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
> +  LoadAlu();
> +}
> +
> +void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
> +  unsigned DestReg = MI->getOperand(0).getReg();
> +  // PressureRegister crashes if an operand is def and used in the same inst
> +  // and we try to constraint its regclass
> +  for (MachineInstr::mop_iterator It = MI->operands_begin(),
> +      E = MI->operands_end(); It != E; ++It) {
> +    MachineOperand &MO = *It;
> +    if (MO.isReg() && !MO.isDef() &&
> +        MO.getReg() == MI->getOperand(0).getReg())
> +      return;
> +  }
> +  // Constrains the regclass of DestReg to assign it to Slot
> +  switch (Slot) {
> +  case 0:
> +    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
> +    break;
> +  case 1:
> +    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
> +    break;
> +  case 2:
> +    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
> +    break;
> +  case 3:
> +    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
> +    break;
> +  }
> +}
> +
> +SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
> +  static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
> +  SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
> +  SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
> +  if (!UnslotedSU) {
> +    return SlotedSU;
> +  } else if (!SlotedSU) {
> +    AssignSlot(UnslotedSU->getInstr(), Slot);
> +    return UnslotedSU;
> +  } else {
> +    //Determine which one to pick (the lesser one)
> +    if (CompareSUnit()(SlotedSU, UnslotedSU)) {
> +      AvailableAlus[AluAny].insert(UnslotedSU);
> +      return SlotedSU;
> +    } else {
> +      AvailableAlus[IndexToID[Slot]].insert(SlotedSU);
> +      AssignSlot(UnslotedSU->getInstr(), Slot);
> +      return UnslotedSU;
> +    }
> +  }
> +}
> +
> +bool R600SchedStrategy::isAvailablesAluEmpty() const {
> +  return Pending[IDAlu]->empty() && AvailableAlus[AluAny].empty() &&
> +      AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() &&
> +      AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() &&
> +      AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty();
> +}
> +
> +SUnit* R600SchedStrategy::pickAlu() {
> +  while (!isAvailablesAluEmpty()) {
> +    if (!OccupedSlotsMask) {
> +      // Flush physical reg copies (RA will discard them)
> +      if (!AvailableAlus[AluDiscarded].empty()) {
> +        return PopInst(AvailableAlus[AluDiscarded]);
> +      }
> +      // If there is a T_XYZW alu available, use it
> +      if (!AvailableAlus[AluT_XYZW].empty()) {
> +        OccupedSlotsMask = 15;
> +        return PopInst(AvailableAlus[AluT_XYZW]);
> +      }
> +    }
> +    for (unsigned Chan = 0; Chan < 4; ++Chan) {
> +      bool isOccupied = OccupedSlotsMask & (1 << Chan);
> +      if (!isOccupied) {
> +        SUnit *SU = AttemptFillSlot(Chan);
> +        if (SU) {
> +          OccupedSlotsMask |= (1 << Chan);
> +          InstructionsGroupCandidate[Chan] = SU;
> +          return SU;
> +        }
> +      }
> +    }
> +    PrepareNextSlot();
> +  }
> +  return NULL;
> +}
> +
> +SUnit* R600SchedStrategy::pickOther(int QID) {
> +  SUnit *SU = 0;
> +  ReadyQueue *AQ = Available[QID];
> +
> +  if (AQ->empty()) {
> +    MoveUnits(Pending[QID], AQ);
> +  }
> +  if (!AQ->empty()) {
> +    SU = *AQ->begin();
> +    AQ->remove(AQ->begin());
> +  }
> +  return SU;
> +}
> +
> diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h
> new file mode 100644
> index 0000000..d74ff1e
> --- /dev/null
> +++ b/lib/Target/R600/R600MachineScheduler.h
> @@ -0,0 +1,121 @@
> +//===-- R600MachineScheduler.h - R600 Scheduler Interface -*- C++ -*-------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// \brief R600 Machine Scheduler interface
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef R600MACHINESCHEDULER_H_
> +#define R600MACHINESCHEDULER_H_
> +
> +#include "R600InstrInfo.h"
> +#include "llvm/CodeGen/MachineScheduler.h"
> +#include "llvm/Support/Debug.h"
> +#include "llvm/ADT/PriorityQueue.h"
> +
> +using namespace llvm;
> +
> +namespace llvm {
> +
> +class CompareSUnit {
> +public:
> +  bool operator()(const SUnit *S1, const SUnit *S2) {
> +    return S1->getDepth() > S2->getDepth();
> +  }
> +};
> +
> +class R600SchedStrategy : public MachineSchedStrategy {
> +
> +  const ScheduleDAGMI *DAG;
> +  const R600InstrInfo *TII;
> +  const R600RegisterInfo *TRI;
> +  MachineRegisterInfo *MRI;
> +
> +  enum InstQueue {
> +    QAlu = 1,
> +    QFetch = 2,
> +    QOther = 4
> +  };
> +
> +  enum InstKind {
> +    IDAlu,
> +    IDFetch,
> +    IDOther,
> +    IDLast
> +  };
> +
> +  enum AluKind {
> +    AluAny,
> +    AluT_X,
> +    AluT_Y,
> +    AluT_Z,
> +    AluT_W,
> +    AluT_XYZW,
> +    AluDiscarded, // LLVM Instructions that are going to be eliminated
> +    AluLast
> +  };
> +
> +  ReadyQueue *Available[IDLast], *Pending[IDLast];
> +  std::multiset<SUnit *, CompareSUnit> AvailableAlus[AluLast];
> +
> +  InstKind CurInstKind;
> +  int CurEmitted;
> +  InstKind NextInstKind;
> +
> +  int InstKindLimit[IDLast];
> +
> +  int OccupedSlotsMask;
> +
> +public:
> +  R600SchedStrategy() :
> +    DAG(0), TII(0), TRI(0), MRI(0) {
> +    Available[IDAlu] = new ReadyQueue(QAlu, "AAlu");
> +    Available[IDFetch] = new ReadyQueue(QFetch, "AFetch");
> +    Available[IDOther] = new ReadyQueue(QOther, "AOther");
> +    Pending[IDAlu] = new ReadyQueue(QAlu<<4, "PAlu");
> +    Pending[IDFetch] = new ReadyQueue(QFetch<<4, "PFetch");
> +    Pending[IDOther] = new ReadyQueue(QOther<<4, "POther");
> +  }
> +
> +  virtual ~R600SchedStrategy() {
> +    for (unsigned I = 0; I < IDLast; ++I) {
> +      delete Available[I];
> +      delete Pending[I];
> +    }
> +  }
> +
> +  virtual void initialize(ScheduleDAGMI *dag);
> +  virtual SUnit *pickNode(bool &IsTopNode);
> +  virtual void schedNode(SUnit *SU, bool IsTopNode);
> +  virtual void releaseTopNode(SUnit *SU);
> +  virtual void releaseBottomNode(SUnit *SU);
> +
> +private:
> +  SUnit *InstructionsGroupCandidate[4];
> +
> +  int getInstKind(SUnit *SU);
> +  bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
> +  AluKind getAluKind(SUnit *SU) const;
> +  void LoadAlu();
> +  bool isAvailablesAluEmpty() const;
> +  SUnit *AttemptFillSlot (unsigned Slot);
> +  void PrepareNextSlot();
> +  SUnit *PopInst(std::multiset<SUnit *, CompareSUnit> &Q);
> +
> +  void AssignSlot(MachineInstr *MI, unsigned Slot);
> +  SUnit* pickAlu();
> +  SUnit* pickOther(int QID);
> +  bool isBundleable(const MachineInstr& MI);
> +  void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst);
> +};
> +
> +} // namespace llvm
> +
> +#endif /* R600MACHINESCHEDULER_H_ */
> diff --git a/test/CodeGen/R600/fdiv.v4f32.ll b/test/CodeGen/R600/fdiv.v4f32.ll
> index 459fd11..79e677f 100644
> --- a/test/CodeGen/R600/fdiv.v4f32.ll
> +++ b/test/CodeGen/R600/fdiv.v4f32.ll
> @@ -1,13 +1,13 @@
>  ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>  
>  ;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> -;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>  ;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> -;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>  ;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> -;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>  ;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>  ;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>  
>  define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
>    %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
> -- 
> 1.8.1.2
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev