[Mesa-dev] [PATCH] R600: Factorize code handling Const Read Port limitation
Vincent Lejeune
vljn at ovi.com
Wed Mar 13 14:59:07 PDT 2013
I fixed the coding style issue.
The <iostream> include was a debug leftover line, it shouldn't be there.
----- Mail original -----
> De : Tom Stellard <tom at stellard.net>
> À : Vincent Lejeune <vljn at ovi.com>
> Cc : llvm-commits at cs.uiuc.edu; mesa-dev at lists.freedesktop.org
> Envoyé le : Mercredi 13 mars 2013 21h49
> Objet : Re: [PATCH] R600: Factorize code handling Const Read Port limitation
>
> On Wed, Mar 13, 2013 at 09:12:41PM +0100, Vincent Lejeune wrote:
>> ---
>> lib/Target/R600/AMDILISelDAGToDAG.cpp | 34 ++++++++++----
>> lib/Target/R600/R600InstrInfo.cpp | 54 ++++++++++++++++++++++
>> lib/Target/R600/R600InstrInfo.h | 3 ++
>> lib/Target/R600/R600MachineScheduler.cpp | 77
> ++++----------------------------
>> lib/Target/R600/R600MachineScheduler.h | 3 +-
>> test/CodeGen/R600/kcache-fold-2.ll | 52 +++++++++++++++++++++
>> 6 files changed, 144 insertions(+), 79 deletions(-)
>> create mode 100644 test/CodeGen/R600/kcache-fold-2.ll
>>
>> diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp
> b/lib/Target/R600/AMDILISelDAGToDAG.cpp
>> index 0c7880d..05a1ea7 100644
>> --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
>> +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
>> @@ -336,6 +336,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
>> return Result;
>> }
>>
>> +
>
> Whitespace
>> bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
>> const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
>> int OperandIdx[] = {
>> @@ -365,17 +366,34 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned
> Opcode,
>> SDValue Operand = Ops[OperandIdx[i] - 1];
>> switch (Operand.getOpcode()) {
>> case AMDGPUISD::CONST_ADDRESS: {
>> - if (i == 2)
>> - break;
>> SDValue CstOffset;
>> - if (!Operand.getValueType().isVector() &&
>> - SelectGlobalValueConstantOffset(Operand.getOperand(0),
> CstOffset)) {
>> - Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST,
> MVT::f32);
>> - Ops[SelIdx[i] - 1] = CstOffset;
>> - return true;
>> + if (Operand.getValueType().isVector() ||
>> + !SelectGlobalValueConstantOffset(Operand.getOperand(0),
> CstOffset))
>> + break;
>> +
>> + // Gather others constants values
>> + std::vector<unsigned> Consts;
>> + for (unsigned j = 0; j < 3; j++) {
>> + int SrcIdx = OperandIdx[j];
>> + if (SrcIdx < 0)
>> + break;
>> + if (RegisterSDNode *Reg =
> dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
>> + if (Reg->getReg() == AMDGPU::ALU_CONST) {
>> + ConstantSDNode *Cst =
> dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
>> + Consts.push_back(Cst->getZExtValue());
>> + }
>> + }
>> }
>> +
>> + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
>> + Consts.push_back(Cst->getZExtValue());
>> + if (!TII->fitsConstReadLimitations(Consts))
>> + break;
>> +
>> + Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST,
> MVT::f32);
>> + Ops[SelIdx[i] - 1] = CstOffset;
>> + return true;
>> }
>> - break;
>> case ISD::FNEG:
>> if (NegIdx[i] < 0)
>> break;
>> diff --git a/lib/Target/R600/R600InstrInfo.cpp
> b/lib/Target/R600/R600InstrInfo.cpp
>> index be3318a..0865098 100644
>> --- a/lib/Target/R600/R600InstrInfo.cpp
>> +++ b/lib/Target/R600/R600InstrInfo.cpp
>> @@ -139,6 +139,60 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const
> {
>> (TargetFlags & R600_InstFlag::OP3));
>> }
>>
>> +bool
>> +R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned>
> &Consts)
>> + const {
>> + assert (Consts.size() <= 12 && "Too many operands in
> instructions group");
>> + unsigned Pair1 = 0, Pair2 = 0;
>> + for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
>> + unsigned ReadConstHalf = Consts[i] & 2;
>> + unsigned ReadConstIndex = Consts[i] & (~3);
>> + unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
>> + if (!Pair1) {
>> + Pair1 = ReadHalfConst;
>> + continue;
>> + }
>> + if (Pair1 == ReadHalfConst)
>> + continue;
>> + if (!Pair2) {
>> + Pair2 = ReadHalfConst;
>> + continue;
>> + }
>> + if (Pair2 != ReadHalfConst)
>> + return false;
>> + }
>> + return true;
>> +}
>> +
>> +bool
>> +R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs)
> const {
>> + std::vector<unsigned> Consts;
>> + for (unsigned i = 0, n = MIs.size(); i < n; i++) {
>> + const MachineInstr *MI = MIs[i];
>> +
>> + const R600Operands::Ops OpTable[3][2] = {
>> + {R600Operands::SRC0, R600Operands::SRC0_SEL},
>> + {R600Operands::SRC1, R600Operands::SRC1_SEL},
>> + {R600Operands::SRC2, R600Operands::SRC2_SEL},
>> + };
>> +
>> + if (!isALUInstr(MI->getOpcode()))
>> + continue;
>> +
>> + for (unsigned j = 0; j < 3; j++) {
>> + int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
>> + if (SrcIdx < 0)
>> + break;
>> + if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
>> + unsigned Const = MI->getOperand(
>> + getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
>> + Consts.push_back(Const);
>> + }
>> + }
>> + }
>> + return fitsConstReadLimitations(Consts);
>> +}
>> +
>> DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const
> TargetMachine *TM,
>> const ScheduleDAG *DAG) const {
>> const InstrItineraryData *II = TM->getInstrItineraryData();
>> diff --git a/lib/Target/R600/R600InstrInfo.h
> b/lib/Target/R600/R600InstrInfo.h
>> index efe721c..bf9569e 100644
>> --- a/lib/Target/R600/R600InstrInfo.h
>> +++ b/lib/Target/R600/R600InstrInfo.h
>> @@ -53,6 +53,9 @@ namespace llvm {
>> /// \returns true if this \p Opcode represents an ALU
> instruction.
>> bool isALUInstr(unsigned Opcode) const;
>>
>> + bool fitsConstReadLimitations(const std::vector<unsigned>&)
> const;
>> + bool canBundle(const std::vector<MachineInstr *> &) const;
>> +
>> /// \breif Vector instructions are instructions that must fill all
>> /// instruction slots within an instruction group.
>> bool isVector(const MachineInstr &MI) const;
>> diff --git a/lib/Target/R600/R600MachineScheduler.cpp
> b/lib/Target/R600/R600MachineScheduler.cpp
>> index ee8d672..b4d9779 100644
>> --- a/lib/Target/R600/R600MachineScheduler.cpp
>> +++ b/lib/Target/R600/R600MachineScheduler.cpp
>> @@ -24,7 +24,7 @@
>> #include "llvm/PassManager.h"
>> #include "llvm/Support/raw_ostream.h"
>> #include <set>
>> -
>> +#include <iostream>
>
> You should double check the LLVM coding style guide, but I think system
> headers need to be sorted.
>
>> using namespace llvm;
>>
>> static
>> @@ -222,7 +222,6 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag)
> {
>> CurInstKind = IDOther;
>> CurEmitted = 0;
>> OccupedSlotsMask = 15;
>> - memset(InstructionsGroupCandidate, 0,
> sizeof(InstructionsGroupCandidate));
>> InstKindLimit[IDAlu] = 120; // 120 minus 8 for security
>> InstKindLimit[IDOther] = 32;
>>
>> @@ -503,79 +502,19 @@ int R600SchedStrategy::getInstKind(SUnit* SU) {
>> }
>> }
>>
>> -class ConstPairs {
>> -private:
>> - unsigned XYPair;
>> - unsigned ZWPair;
>> -public:
>> - ConstPairs(unsigned ReadConst[3]) : XYPair(0), ZWPair(0) {
>> - for (unsigned i = 0; i < 3; i++) {
>> - unsigned ReadConstChan = ReadConst[i] & 3;
>> - unsigned ReadConstIndex = ReadConst[i] & (~3);
>> - if (ReadConstChan < 2) {
>> - if (!XYPair) {
>> - XYPair = ReadConstIndex;
>> - }
>> - } else {
>> - if (!ZWPair) {
>> - ZWPair = ReadConstIndex;
>> - }
>> - }
>> - }
>> - }
>> -
>> - bool isCompatibleWith(const ConstPairs& CP) const {
>> - return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) &&
>> - (!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair);
>> - }
>> -};
>> -
>> -static
>> -const ConstPairs getPairs(const R600InstrInfo *TII, const
> MachineInstr& MI) {
>> - unsigned ReadConsts[3] = {0, 0, 0};
>> - R600Operands::Ops OpTable[3][2] = {
>> - {R600Operands::SRC0, R600Operands::SRC0_SEL},
>> - {R600Operands::SRC1, R600Operands::SRC1_SEL},
>> - {R600Operands::SRC2, R600Operands::SRC2_SEL},
>> - };
>> -
>> - if (!TII->isALUInstr(MI.getOpcode()))
>> - return ConstPairs(ReadConsts);
>> -
>> - for (unsigned i = 0; i < 3; i++) {
>> - int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]);
>> - if (SrcIdx < 0)
>> - break;
>> - if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST)
>> - ReadConsts[i] =MI.getOperand(
>> - TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm();
>> - }
>> - return ConstPairs(ReadConsts);
>> -}
>> -
>> -bool
>> -R600SchedStrategy::isBundleable(const MachineInstr& MI) {
>> - const ConstPairs &MIPair = getPairs(TII, MI);
>> - for (unsigned i = 0; i < 4; i++) {
>> - if (!InstructionsGroupCandidate[i])
>> - continue;
>> - const ConstPairs &IGPair = getPairs(TII,
>> - *InstructionsGroupCandidate[i]->getInstr());
>> - if (!IGPair.isCompatibleWith(MIPair))
>> - return false;
>> - }
>> - return true;
>> -}
>> -
>> SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) {
>> if (Q.empty())
>> return NULL;
>> for (std::vector<SUnit *>::iterator It = Q.begin(), E = Q.end();
>> It != E; ++It) {
>> SUnit *SU = *It;
>> - if (isBundleable(*SU->getInstr())) {
>> + InstructionsGroupCandidate.push_back(SU->getInstr());
>> + if (TII->canBundle(InstructionsGroupCandidate)) {
>> + InstructionsGroupCandidate.pop_back();
>> Q.erase(It);
>> return SU;
>> + } else {
>> + InstructionsGroupCandidate.pop_back();
>> }
>> }
>> return NULL;
>> @@ -596,7 +535,7 @@ void R600SchedStrategy::PrepareNextSlot() {
>> DEBUG(dbgs() << "New Slot\n");
>> assert (OccupedSlotsMask && "Slot wasn't filled");
>> OccupedSlotsMask = 0;
>> - memset(InstructionsGroupCandidate, 0,
> sizeof(InstructionsGroupCandidate));
>> + InstructionsGroupCandidate.clear();
>> LoadAlu();
>> }
>>
>> @@ -666,7 +605,7 @@ SUnit* R600SchedStrategy::pickAlu() {
>> SUnit *SU = AttemptFillSlot(Chan);
>> if (SU) {
>> OccupedSlotsMask |= (1 << Chan);
>> - InstructionsGroupCandidate[Chan] = SU;
>> + InstructionsGroupCandidate.push_back(SU->getInstr());
>> return SU;
>> }
>> }
>> diff --git a/lib/Target/R600/R600MachineScheduler.h
> b/lib/Target/R600/R600MachineScheduler.h
>> index 4b1f6c3..47b1d56 100644
>> --- a/lib/Target/R600/R600MachineScheduler.h
>> +++ b/lib/Target/R600/R600MachineScheduler.h
>> @@ -100,7 +100,7 @@ public:
>> virtual void releaseBottomNode(SUnit *SU);
>>
>> private:
>> - SUnit *InstructionsGroupCandidate[4];
>> + std::vector<MachineInstr *> InstructionsGroupCandidate;
>>
>> int getInstKind(SUnit *SU);
>> bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC)
> const;
>> @@ -114,7 +114,6 @@ private:
>> void AssignSlot(MachineInstr *MI, unsigned Slot);
>> SUnit* pickAlu();
>> SUnit* pickOther(int QID);
>> - bool isBundleable(const MachineInstr& MI);
>> void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst);
>> };
>>
>> diff --git a/test/CodeGen/R600/kcache-fold-2.ll
> b/test/CodeGen/R600/kcache-fold-2.ll
>> new file mode 100644
>> index 0000000..77d0052
>> --- /dev/null
>> +++ b/test/CodeGen/R600/kcache-fold-2.ll
>
> For performance reasons (I think), it's prefered to stick similar tests
> into the same file, so this test should be moved to kcache-fold.ll.
> You will also need to add a CHECK: line for each function name in the
> new file. Take a look at some of the other tests to see how this is done.
>
> -Tom
>
>> @@ -0,0 +1,52 @@
>> +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>> +
>> +; CHECK-NOT: MOV
>> +
>> +define void @main() {
>> +main_body:
>> + %0 = load <4 x float> addrspace(8)* null
>> + %1 = extractelement <4 x float> %0, i32 0
>> + %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x
> float>] addrspace(8)* null, i64 0, i32 1)
>> + %3 = extractelement <4 x float> %2, i32 0
>> + %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x
> float>] addrspace(8)* null, i64 0, i32 1)
>> + %5 = extractelement <4 x float> %4, i32 1
>> + %6 = fcmp ult float %1, 0.000000e+00
>> + %7 = select i1 %6, float %3, float %5
>> + %8 = load <4 x float> addrspace(8)* null
>> + %9 = extractelement <4 x float> %8, i32 1
>> + %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4
> x float>] addrspace(8)* null, i64 0, i32 2)
>> + %11 = extractelement <4 x float> %10, i32 0
>> + %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4
> x float>] addrspace(8)* null, i64 0, i32 2)
>> + %13 = extractelement <4 x float> %12, i32 1
>> + %14 = fcmp ult float %9, 0.000000e+00
>> + %15 = select i1 %14, float %11, float %13
>> + %16 = load <4 x float> addrspace(8)* null
>> + %17 = extractelement <4 x float> %16, i32 2
>> + %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4
> x float>] addrspace(8)* null, i64 0, i32 1)
>> + %19 = extractelement <4 x float> %18, i32 3
>> + %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4
> x float>] addrspace(8)* null, i64 0, i32 1)
>> + %21 = extractelement <4 x float> %20, i32 2
>> + %22 = fcmp ult float %17, 0.000000e+00
>> + %23 = select i1 %22, float %19, float %21
>> + %24 = load <4 x float> addrspace(8)* null
>> + %25 = extractelement <4 x float> %24, i32 3
>> + %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4
> x float>] addrspace(8)* null, i64 0, i32 2)
>> + %27 = extractelement <4 x float> %26, i32 3
>> + %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4
> x float>] addrspace(8)* null, i64 0, i32 2)
>> + %29 = extractelement <4 x float> %28, i32 2
>> + %30 = fcmp ult float %25, 0.000000e+00
>> + %31 = select i1 %30, float %27, float %29
>> + %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float
> 1.000000e+00)
>> + %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float
> 1.000000e+00)
>> + %34 = call float @llvm.AMDIL.clamp.(float %23, float 0.000000e+00, float
> 1.000000e+00)
>> + %35 = call float @llvm.AMDIL.clamp.(float %31, float 0.000000e+00, float
> 1.000000e+00)
>> + %36 = insertelement <4 x float> undef, float %32, i32 0
>> + %37 = insertelement <4 x float> %36, float %33, i32 1
>> + %38 = insertelement <4 x float> %37, float %34, i32 2
>> + %39 = insertelement <4 x float> %38, float %35, i32 3
>> + call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0)
>> + ret void
>> +}
>> +
>> +declare float @llvm.AMDIL.clamp.(float, float, float) readnone
>> +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
>> --
>> 1.8.1.4
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-R600-Factorize-code-handling-Const-Read-Port-limitat.patch
Type: application/octet-stream
Size: 12979 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20130313/e0596999/attachment-0001.obj>
More information about the mesa-dev
mailing list