[Mesa-dev] [PATCH] [RFC] R600: rework handling of the constants
Vadim Girlin
vadimgirlin at gmail.com
Thu Dec 20 08:56:21 PST 2012
On Thu, 2012-12-20 at 07:32 -0800, Tom Stellard wrote:
> On Thu, Dec 20, 2012 at 04:47:50PM +0400, Vadim Girlin wrote:
> > Remove Cxxx registers, add new special register - "ALU_CONST" and new
> > operand for each alu src - "sel". ALU_CONST is used to designate that the
> > new operand contains the value to override src.sel, src.kc_bank, src.chan
> > for constants in the driver.
> > ---
>
> Overall this looks pretty good. It is a nice cleanup and it's good
> to get rid of all those Constant registers. I have a few comments below:
>
> > .../AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp | 78 ++++++++++++----------
> > lib/Target/AMDGPU/R600Defines.h | 15 +++++
> > lib/Target/AMDGPU/R600ISelLowering.cpp | 13 ++--
> > lib/Target/AMDGPU/R600InstrInfo.cpp | 18 ++---
> > lib/Target/AMDGPU/R600Instructions.td | 14 ++--
> > lib/Target/AMDGPU/R600RegisterInfo.cpp | 6 +-
> > lib/Target/AMDGPU/R600RegisterInfo.td | 15 +----
> > 7 files changed, 80 insertions(+), 79 deletions(-)
> >
> > diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
> > index dc91924..e357598 100644
> > --- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
> > +++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
> > @@ -64,8 +64,8 @@ private:
> > void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
> > raw_ostream &OS) const;
> > void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
> > - void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value,
> > - raw_ostream &OS) const;
> > + void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx,
> > + raw_ostream &OS) const;
> > void EmitDst(const MCInst &MI, raw_ostream &OS) const;
> > void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
> > raw_ostream &OS) const;
> > @@ -194,7 +194,6 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
> > SmallVectorImpl<MCFixup> &Fixups,
> > raw_ostream &OS) const {
> > const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
> > - unsigned NumOperands = MI.getNumOperands();
> >
> > // Emit instruction type
> > EmitByte(INSTR_ALU, OS);
> > @@ -210,19 +209,21 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
> > InstWord01 |= ISAOpCode << 1;
> > }
> >
> > - unsigned SrcIdx = 0;
> > - for (unsigned int OpIdx = 1; OpIdx < NumOperands; ++OpIdx) {
> > - if (MI.getOperand(OpIdx).isImm() || MI.getOperand(OpIdx).isFPImm() ||
> > - OpIdx == (unsigned)MCDesc.findFirstPredOperandIdx()) {
> > - continue;
> > - }
> > - EmitSrcISA(MI, OpIdx, InstWord01, OS);
> > - SrcIdx++;
> > - }
> > + unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 :
> > + MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1;
> >
> > - // Emit zeros for unused sources
> > - for ( ; SrcIdx < 3; SrcIdx++) {
> > - EmitNullBytes(SRC_BYTE_COUNT - 6, OS);
> > + EmitByte(SrcNum, OS);
> > +
> > + const unsigned SrcOps[3][2] = {
> > + {R600Operands::SRC0, R600Operands::SRC0_SEL},
> > + {R600Operands::SRC1, R600Operands::SRC1_SEL},
> > + {R600Operands::SRC2, R600Operands::SRC2_SEL}
> > + };
> > +
> > + for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) {
> > + unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]];
> > + unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]];
> > + EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS);
> > }
> >
> > Emit(InstWord01, OS);
> > @@ -293,34 +294,37 @@ void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
> >
> > }
> >
> > -void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx,
> > - uint64_t &Value, raw_ostream &OS) const {
> > - const MCOperand &MO = MI.getOperand(OpIdx);
> > +void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx,
> > + unsigned SelOpIdx, raw_ostream &OS) const {
> > + const MCOperand &RegMO = MI.getOperand(RegOpIdx);
> > + const MCOperand &SelMO = MI.getOperand(SelOpIdx);
> > +
> > union {
> > float f;
> > uint32_t i;
> > } InlineConstant;
> > InlineConstant.i = 0;
> > - // Emit the source select (2 bytes). For GPRs, this is the register index.
> > - // For other potential instruction operands, (e.g. constant registers) the
> > - // value of the source select is defined in the r600isa docs.
> > - if (MO.isReg()) {
> > - unsigned Reg = MO.getReg();
> > - if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) {
> > - EmitByte(1, OS);
> > - } else {
> > - EmitByte(0, OS);
> > - }
> > + // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0
> > + // and select is 0 (GPR index is encoded in the instr encoding. For constants
> > + // type is 1 and select is the original const select passed from the driver.
> > + unsigned Reg = RegMO.getReg();
> > + if (Reg == AMDGPU::ALU_CONST) {
> > + EmitByte(1, OS);
> > + uint32_t Sel = SelMO.getImm();
> > + Emit(Sel, OS);
> > + } else {
> > + EmitByte(0, OS);
> > + Emit((uint32_t)0, OS);
> > + }
> >
>
> If possible, we should patch the 64-bit instruction encoding (InstWord01)
> here rather than emit a special byte. This will simplfy the code in r600g
> and bring us closer to emitting real ISA.
I agree, but it's impossible until we'll have the handling of the kcache
sets for alu clauses in the backend. ALU instruction encoding has only 9
bits for SRC_SEL, but for now we need 12 bits to encode constant index
and 4 bits for const buffer index (kc_bank), so we have to use
additional data. It's a temporary solution, later kc_bank and most of
the constant index bits will be passed in the corresponding fields of
the CF_ALU_xxx encodings, and then we'll be able to get rid of the
additional bytes here.
I'll try to fix/improve the things you mentioned in other comments for
both patches.
Vadim
>
> > - if (Reg == AMDGPU::ALU_LITERAL_X) {
> > - unsigned ImmOpIndex = MI.getNumOperands() - 1;
> > - MCOperand ImmOp = MI.getOperand(ImmOpIndex);
> > - if (ImmOp.isFPImm()) {
> > - InlineConstant.f = ImmOp.getFPImm();
> > - } else {
> > - assert(ImmOp.isImm());
> > - InlineConstant.i = ImmOp.getImm();
> > - }
> > + if (Reg == AMDGPU::ALU_LITERAL_X) {
> > + unsigned ImmOpIndex = MI.getNumOperands() - 1;
> > + MCOperand ImmOp = MI.getOperand(ImmOpIndex);
> > + if (ImmOp.isFPImm()) {
> > + InlineConstant.f = ImmOp.getFPImm();
> > + } else {
> > + assert(ImmOp.isImm());
> > + InlineConstant.i = ImmOp.getImm();
> > }
> > }
> >
> > diff --git a/lib/Target/AMDGPU/R600Defines.h b/lib/Target/AMDGPU/R600Defines.h
> > index 7dea8e4..e19eea3 100644
> > --- a/lib/Target/AMDGPU/R600Defines.h
> > +++ b/lib/Target/AMDGPU/R600Defines.h
> > @@ -62,18 +62,33 @@ namespace R600Operands {
> > SRC0_NEG,
> > SRC0_REL,
> > SRC0_ABS,
> > + SRC0_SEL,
> > SRC1,
> > SRC1_NEG,
> > SRC1_REL,
> > SRC1_ABS,
> > + SRC1_SEL,
> > SRC2,
> > SRC2_NEG,
> > SRC2_REL,
> > + SRC2_SEL,
> > LAST,
> > PRED_SEL,
> > IMM,
> > COUNT
> > };
> > +
> > + const static int ALUOpTable[3][R600Operands::COUNT] = {
> > +// W C S S S S S S S S S S S
> > +// R O D L S R R R R S R R R R S R R R L P
> > +// D U I M R A R C C C C R C C C C R C C C A R I
> > +// S E U T O E M C 0 0 0 0 C 1 1 1 1 C 2 2 2 S E M
> > +// T M P E D L P 0 N R A S 1 N R A S 2 N R S T D M
> > + {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12},
> > + {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19},
> > + {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17}
> > + };
> > +
> > }
> >
> > #endif // R600DEFINES_H_
> > diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
> > index 8a5e194..b1ccfc4 100644
> > --- a/lib/Target/AMDGPU/R600ISelLowering.cpp
> > +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
> > @@ -16,6 +16,7 @@
> > #include "R600Defines.h"
> > #include "R600InstrInfo.h"
> > #include "R600MachineFunctionInfo.h"
> > +#include "AMDGPURegisterInfo.h"
>
> Coding style: The includes other than R600ISelLowering.h need to be sorted
> alphabetically.
>
> > #include "llvm/Argument.h"
> > #include "llvm/Function.h"
> > #include "llvm/CodeGen/MachineInstrBuilder.h"
> > @@ -115,11 +116,13 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
> > }
> >
> > case AMDGPU::R600_LOAD_CONST: {
> > - int64_t RegIndex = MI->getOperand(1).getImm();
> > - unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
> > - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
> > - .addOperand(MI->getOperand(0))
> > - .addReg(ConstantReg);
> > + unsigned ConstSel = MI->getOperand(1).getImm();
> > + MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
> > + AMDGPU::MOV,
> > + MI->getOperand(0).getReg(),
> > + AMDGPU::ALU_CONST);
> > + int SelIdx = TII->getOperandIdx(*NewMI, R600Operands::SRC0_SEL);
> > + NewMI->getOperand(SelIdx).setImm(ConstSel);
> > break;
> > }
>
> If you move this code into AMDGPUDAGToDAGISel::Select() you will be able
> to fold the constant "registers" into their uses and eliminate the extra
> MOV instruction. Just like what we do for immediates (ISD::Constant).
>
> >
> > diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp
> > index a60a180..6c1c50a 100644
> > --- a/lib/Target/AMDGPU/R600InstrInfo.cpp
> > +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp
> > @@ -484,13 +484,15 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB
> > .addReg(Src0Reg) // $src0
> > .addImm(0) // $src0_neg
> > .addImm(0) // $src0_rel
> > - .addImm(0); // $src0_abs
> > + .addImm(0) // $src0_abs
> > + .addImm(0); // $src0_sel
> >
> > if (Src1Reg) {
> > MIB.addReg(Src1Reg) // $src1
> > .addImm(0) // $src1_neg
> > .addImm(0) // $src1_rel
> > - .addImm(0); // $src1_abs
> > + .addImm(0) // $src1_abs
> > + .addImm(0); // $src1_sel
> > }
> >
> > //XXX: The r600g finalizer expects this to be 1, once we've moved the
> > @@ -519,16 +521,6 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
> >
> > int R600InstrInfo::getOperandIdx(unsigned Opcode,
> > R600Operands::Ops Op) const {
> > - const static int OpTable[3][R600Operands::COUNT] = {
> > -// W C S S S S S S S S
> > -// R O D L S R R R S R R R S R R L P
> > -// D U I M R A R C C C C C C C R C C A R I
> > -// S E U T O E M C 0 0 0 C 1 1 1 C 2 2 S E M
> > -// T M P E D L P 0 N R A 1 N R A 2 N R T D M
> > - {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8,-1,-1,-1,-1,-1,-1,-1, 9,10,11},
> > - {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,-1,-1,-1,13,14,15,16,17},
> > - {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8,-1, 9,10,11,12,13,14}
> > - };
> > unsigned TargetFlags = get(Opcode).TSFlags;
> > unsigned OpTableIdx;
> >
> > @@ -554,7 +546,7 @@ int R600InstrInfo::getOperandIdx(unsigned Opcode,
> > OpTableIdx = 2;
> > }
> >
> > - return OpTable[OpTableIdx][Op];
> > + return R600Operands::ALUOpTable[OpTableIdx][Op];
> > }
> >
> > void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
> > diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
> > index d7659d9..9d738ea 100644
> > --- a/lib/Target/AMDGPU/R600Instructions.td
> > +++ b/lib/Target/AMDGPU/R600Instructions.td
> > @@ -70,6 +70,8 @@ class InstFlag<string PM = "printOperand", int Default = 0>
> > let PrintMethod = PM;
> > }
> >
> > +def SEL : OperandWithDefaultOps <i32, (ops (i32 0))>;
> > +
> > def LITERAL : InstFlag<"printLiteral">;
> >
> > def WRITE : InstFlag <"printWrite", 1>;
> > @@ -214,7 +216,7 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
> > InstR600 <0,
> > (outs R600_Reg32:$dst),
> > (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
> > - R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
> > + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
> > LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
> > !strconcat(opName,
> > "$clamp $dst$write$dst_rel$omod, "
> > @@ -254,8 +256,8 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
> > (outs R600_Reg32:$dst),
> > (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
> > OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
> > - R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
> > - R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs,
> > + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
> > + R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
> > LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
> > !strconcat(opName,
> > "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
> > @@ -291,9 +293,9 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
> > InstR600 <0,
> > (outs R600_Reg32:$dst),
> > (ins REL:$dst_rel, CLAMP:$clamp,
> > - R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel,
> > - R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel,
> > - R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel,
> > + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
> > + R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
> > + R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
> > LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
> > !strconcat(opName, "$clamp $dst$dst_rel, "
> > "$src0_neg$src0$src0_rel, "
>
> We should try to add the new select fields to the assembly output.
> Maybe something like CONST[bank, sel]. This is just an example, feel free to
> come up with whatever assembly string you think makes the most sense.
>
> > diff --git a/lib/Target/AMDGPU/R600RegisterInfo.cpp b/lib/Target/AMDGPU/R600RegisterInfo.cpp
> > index a39f83d..0441e4a 100644
> > --- a/lib/Target/AMDGPU/R600RegisterInfo.cpp
> > +++ b/lib/Target/AMDGPU/R600RegisterInfo.cpp
> > @@ -38,16 +38,12 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
> > Reserved.set(AMDGPU::NEG_ONE);
> > Reserved.set(AMDGPU::PV_X);
> > Reserved.set(AMDGPU::ALU_LITERAL_X);
> > + Reserved.set(AMDGPU::ALU_CONST);
> > Reserved.set(AMDGPU::PREDICATE_BIT);
> > Reserved.set(AMDGPU::PRED_SEL_OFF);
> > Reserved.set(AMDGPU::PRED_SEL_ZERO);
> > Reserved.set(AMDGPU::PRED_SEL_ONE);
> >
> > - for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(),
> > - E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) {
> > - Reserved.set(*I);
> > - }
> > -
> > for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
> > E = MFI->ReservedRegs.end(); I != E; ++I) {
> > Reserved.set(*I);
> > diff --git a/lib/Target/AMDGPU/R600RegisterInfo.td b/lib/Target/AMDGPU/R600RegisterInfo.td
> > index d3d6d25..62579ab 100644
> > --- a/lib/Target/AMDGPU/R600RegisterInfo.td
> > +++ b/lib/Target/AMDGPU/R600RegisterInfo.td
> > @@ -27,10 +27,6 @@ foreach Index = 0-127 in {
> > foreach Chan = [ "X", "Y", "Z", "W" ] in {
> > // 32-bit Temporary Registers
> > def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
> > -
> > - // 32-bit Constant Registers (There are more than 128, this the number
> > - // that is currently supported.
> > - def C#Index#_#Chan : R600RegWithChan <"C"#Index#"."#Chan, Index, Chan>;
> > }
> > // 128-bit Temporary Registers
> > def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
> > @@ -61,17 +57,11 @@ def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
> > def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
> > def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
> > def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
> > +def ALU_CONST : R600Reg<"Const", 0>;
> >
> > def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
> > (add (sequence "ArrayBase%u", 448, 464))>;
> >
> > -def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
> > - (add (interleave
> > - (interleave (sequence "C%u_X", 0, 127),
> > - (sequence "C%u_Z", 0, 127)),
> > - (interleave (sequence "C%u_Y", 0, 127),
> > - (sequence "C%u_W", 0, 127))))>;
> > -
> > def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
> > (add (sequence "T%u_X", 0, 127))>;
> >
> > @@ -91,9 +81,8 @@ def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
> >
> > def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
> > R600_TReg32,
> > - R600_CReg32,
> > R600_ArrayBase,
> > - ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
> > + ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, ALU_CONST, NEG_ONE, NEG_HALF)>;
> >
> > def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
> > PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;
> > --
> > 1.8.0.2
> >
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list