[Mesa-dev] [PATCH] R600: Don't use trans slot for instructions that read LDS source registers
Vincent Lejeune
vljn at ovi.com
Sun Sep 8 10:39:53 PDT 2013
A few comments below, otherwise :
reviewed-by: Vincent Lejeune<vljn at ovi.com>
----- Mail original -----
> De : Tom Stellard <tom at stellard.net>
> À : llvm-commits at cs.uiuc.edu
> Cc : mesa-dev at lists.freedesktop.org; Tom Stellard <thomas.stellard at amd.com>
> Envoyé le : Vendredi 6 septembre 2013 0h23
> Objet : [PATCH] R600: Don't use trans slot for instructions that read LDS source registers
>
> From: Tom Stellard <thomas.stellard at amd.com>
>
> This fixes some regressions in the piglit local memory store tests
> introduced by recent commits which made the scheduler aware of the trans
> slot.
>
> It's not possible to test this using lit, because there is no way to
> determine from the assembly dumps whether or not an instruction is in
> the trans slot.
>
> Even if this were possible, the test would be highly sensitive to
> changes in the scheduler and might generate confusing false negatives.
> ---
> lib/Target/R600/R600InstrInfo.cpp | 17 +++++++++++++++++
> lib/Target/R600/R600InstrInfo.h | 1 +
> lib/Target/R600/R600MachineScheduler.cpp | 5 +++++
> lib/Target/R600/R600Packetizer.cpp | 5 +++++
> lib/Target/R600/R600RegisterInfo.td | 10 +++++++++-
> 5 files changed, 37 insertions(+), 1 deletion(-)
>
> diff --git a/lib/Target/R600/R600InstrInfo.cpp
> b/lib/Target/R600/R600InstrInfo.cpp
> index 0e7cfb4..60a3f7d 100644
> --- a/lib/Target/R600/R600InstrInfo.cpp
> +++ b/lib/Target/R600/R600InstrInfo.cpp
> @@ -204,6 +204,23 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode)
> const {
> }
> }
>
> +bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const {
> + if (!isALUInstr(MI->getOpcode())) {
> + return false;
> + }
> + for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
> + E = MI->operands_end(); I != E; ++I)
> {
> + if (!I->isReg() || !I->isUse() ||
> + TargetRegisterInfo::isVirtualRegister(I->getReg())) {
> + continue;
> + }
> + if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg())) {
> + return true;
> + }
The bracket in this if statements and in the previous one are unneeded.
> + }
> + return false;
> +}
> +
> int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
> static const unsigned OpTable[] = {
> AMDGPU::OpName::src0,
> diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
> index 24cc43d..0d1ffc8 100644
> --- a/lib/Target/R600/R600InstrInfo.h
> +++ b/lib/Target/R600/R600InstrInfo.h
> @@ -78,6 +78,7 @@ namespace llvm {
> bool usesTextureCache(const MachineInstr *MI) const;
>
> bool mustBeLastInClause(unsigned Opcode) const;
> + bool readsLDSSrcReg(const MachineInstr *MI) const;
>
> /// \returns The operand index for the given source number. Legal values
> /// for SrcNum are 0, 1, and 2.
> diff --git a/lib/Target/R600/R600MachineScheduler.cpp
> b/lib/Target/R600/R600MachineScheduler.cpp
> index 0499dd5..f67ba89 100644
> --- a/lib/Target/R600/R600MachineScheduler.cpp
> +++ b/lib/Target/R600/R600MachineScheduler.cpp
> @@ -314,6 +314,11 @@ R600SchedStrategy::AluKind
> R600SchedStrategy::getAluKind(SUnit *SU) const {
> if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
> return AluT_XYZW;
>
> + // LDS src registers cannot be used in the Trans slot.
> + if (TII->readsLDSSrcReg(MI)) {
> + return AluT_XYZW;
> + }
Here too
> +
> return AluAny;
>
> }
> diff --git a/lib/Target/R600/R600Packetizer.cpp
> b/lib/Target/R600/R600Packetizer.cpp
> index 6c70052..ee256d5 100644
> --- a/lib/Target/R600/R600Packetizer.cpp
> +++ b/lib/Target/R600/R600Packetizer.cpp
> @@ -272,6 +272,11 @@ public:
> return false;
> }
>
> + // We cannot read LDS source registrs from the Trans slot.
> + if (isTransSlot && TII->readsLDSSrcReg(MI)) {
> + return false;
> + }
And here too
> +
> CurrentPacketMIs.pop_back();
> return true;
> }
> diff --git a/lib/Target/R600/R600RegisterInfo.td
> b/lib/Target/R600/R600RegisterInfo.td
> index fa987cf..514427e 100644
> --- a/lib/Target/R600/R600RegisterInfo.td
> +++ b/lib/Target/R600/R600RegisterInfo.td
> @@ -95,6 +95,12 @@ foreach Index = 448-480 in {
>
> // Special Registers
>
> +def OQA : R600Reg<"OQA", 219>;
> +def OQB : R600Reg<"OQB", 220>;
> +def OQAP : R600Reg<"OQAP", 221>;
> +def OQBP : R600Reg<"OQAP", 222>;
> +def LDS_DIRECT_A : R600Reg<"LDS_DIRECT_A", 223>;
> +def LDS_DIRECT_B : R600Reg<"LDS_DIRECT_B", 224>;
> def ZERO : R600Reg<"0.0", 248>;
> def ONE : R600Reg<"1.0", 249>;
> def NEG_ONE : R600Reg<"-1.0", 249>;
> @@ -115,7 +121,6 @@ def PRED_SEL_OFF: R600Reg<"Pred_sel_off",
> 0>;
> def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
> def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
> def AR_X : R600Reg<"AR.x", 0>;
> -def OQAP : R600Reg<"OQAP", 221>;
>
> def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
> (add (sequence "ArrayBase%u", 448,
> 480))>;
> @@ -130,6 +135,9 @@ let isAllocatable = 0 in {
> // XXX: Only use the X channel, until we support wider stack widths
> def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence
> "Addr%u_X", 0, 127))>;
>
> +def R600_LDS_SRC_REG : RegisterClass<"AMDGPU", [i32], 32,
> + (add OQA, OQB, OQAP, OQBP, LDS_DIRECT_A, LDS_DIRECT_B)>;
> +
> } // End isAllocatable = 0
>
> def R600_KC0_X : RegisterClass <"AMDGPU", [f32, i32], 32,
> --
> 1.7.11.4
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
More information about the mesa-dev
mailing list