[Mesa-dev] [PATCH 1/4] R600: Consolidate sub register indices.
Michel Dänzer
michel at daenzer.net
Fri Feb 1 07:39:43 PST 2013
From: Michel Dänzer <michel.daenzer at amd.com>
Use sub0-15 everywhere.
Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
---
lib/Target/R600/AMDGPUInstructions.td | 4 +--
lib/Target/R600/AMDGPURegisterInfo.cpp | 32 ++++++++++-----------
lib/Target/R600/AMDGPURegisterInfo.td | 7 +----
lib/Target/R600/R600Instructions.td | 36 ++++++++++++------------
lib/Target/R600/R600RegisterInfo.cpp | 8 +++---
lib/Target/R600/R600RegisterInfo.td | 7 ++---
lib/Target/R600/SIInstructions.td | 48 ++++++++++++++++----------------
lib/Target/R600/SIRegisterInfo.td | 24 ++++------------
8 files changed, 73 insertions(+), 93 deletions(-)
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index 3dee004..6765bc8 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -196,8 +196,8 @@ class Vector_Build <ValueType vecType, RegisterClass vectorClass,
(vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
(elemType elemClass:$z), (elemType elemClass:$w))),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
- (vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y),
- elemClass:$z, sel_z), elemClass:$w, sel_w)
+ (vecType (IMPLICIT_DEF)), elemClass:$x, sub0), elemClass:$y, sub1),
+ elemClass:$z, sub2), elemClass:$w, sub3)
>;
// bitconvert pattern
diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp
index ea0efd5..d62e57b 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.cpp
+++ b/lib/Target/R600/AMDGPURegisterInfo.cpp
@@ -50,22 +50,22 @@ unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const {
switch(IndirectIndex) {
- case 0: return AMDGPU::indirect_0;
- case 1: return AMDGPU::indirect_1;
- case 2: return AMDGPU::indirect_2;
- case 3: return AMDGPU::indirect_3;
- case 4: return AMDGPU::indirect_4;
- case 5: return AMDGPU::indirect_5;
- case 6: return AMDGPU::indirect_6;
- case 7: return AMDGPU::indirect_7;
- case 8: return AMDGPU::indirect_8;
- case 9: return AMDGPU::indirect_9;
- case 10: return AMDGPU::indirect_10;
- case 11: return AMDGPU::indirect_11;
- case 12: return AMDGPU::indirect_12;
- case 13: return AMDGPU::indirect_13;
- case 14: return AMDGPU::indirect_14;
- case 15: return AMDGPU::indirect_15;
+ case 0: return AMDGPU::sub0;
+ case 1: return AMDGPU::sub1;
+ case 2: return AMDGPU::sub2;
+ case 3: return AMDGPU::sub3;
+ case 4: return AMDGPU::sub4;
+ case 5: return AMDGPU::sub5;
+ case 6: return AMDGPU::sub6;
+ case 7: return AMDGPU::sub7;
+ case 8: return AMDGPU::sub8;
+ case 9: return AMDGPU::sub9;
+ case 10: return AMDGPU::sub10;
+ case 11: return AMDGPU::sub11;
+ case 12: return AMDGPU::sub12;
+ case 13: return AMDGPU::sub13;
+ case 14: return AMDGPU::sub14;
+ case 15: return AMDGPU::sub15;
default: llvm_unreachable("indirect index out of range");
}
}
diff --git a/lib/Target/R600/AMDGPURegisterInfo.td b/lib/Target/R600/AMDGPURegisterInfo.td
index 0b4482c..b5aca03 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.td
+++ b/lib/Target/R600/AMDGPURegisterInfo.td
@@ -12,14 +12,9 @@
//===----------------------------------------------------------------------===//
let Namespace = "AMDGPU" in {
- def sel_x : SubRegIndex;
- def sel_y : SubRegIndex;
- def sel_z : SubRegIndex;
- def sel_w : SubRegIndex;
-
foreach Index = 0-15 in {
- def indirect_#Index : SubRegIndex;
+ def sub#Index : SubRegIndex;
}
def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">;
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 1080806..f3513bb 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -578,13 +578,13 @@ class ExportBufWord1 {
multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
(ExportInst
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
0, 61, 0, 7, 7, 7, cf_inst, 0)
>;
def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
(ExportInst
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
0, 61, 7, 0, 7, 7, cf_inst, 0)
>;
@@ -1815,25 +1815,25 @@ def : Pat <
(SNE R600_Reg32:$src0, R600_Reg32:$src1)
>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 0, sub0>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 1, sub1>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 2, sub2>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 3, sub3>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sel_x>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sel_y>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sel_z>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sel_w>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sub0>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sub1>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sub2>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sub3>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 0, sub0>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 1, sub1>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 2, sub2>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 3, sub3>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sel_x>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sel_y>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sel_z>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sel_w>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sub0>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>;
def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp
index cd3fc4a..33e858d 100644
--- a/lib/Target/R600/R600RegisterInfo.cpp
+++ b/lib/Target/R600/R600RegisterInfo.cpp
@@ -84,10 +84,10 @@ const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const {
switch (Channel) {
default: assert(!"Invalid channel index"); return 0;
- case 0: return AMDGPU::sel_x;
- case 1: return AMDGPU::sel_y;
- case 2: return AMDGPU::sel_z;
- case 3: return AMDGPU::sel_w;
+ case 0: return AMDGPU::sub0;
+ case 1: return AMDGPU::sub1;
+ case 2: return AMDGPU::sub2;
+ case 3: return AMDGPU::sub3;
}
}
diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td
index 9a8b859..3812eb7 100644
--- a/lib/Target/R600/R600RegisterInfo.td
+++ b/lib/Target/R600/R600RegisterInfo.td
@@ -19,7 +19,7 @@ class R600RegWithChan <string name, bits<9> sel, string chan> :
class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
RegisterWithSubRegs<n, subregs> {
let Namespace = "AMDGPU";
- let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
+ let SubRegIndices = [sub0, sub1, sub2, sub3];
let HWEncoding = encoding;
}
@@ -126,9 +126,8 @@ class IndirectSuper<string n, list<Register> subregs> :
RegisterWithSubRegs<n, subregs> {
let Namespace = "AMDGPU";
let SubRegIndices =
- [indirect_0,indirect_1,indirect_2,indirect_3,indirect_4,indirect_5,indirect_6,
- indirect_7,indirect_8,indirect_9,indirect_10,indirect_11,indirect_12,
- indirect_13,indirect_14,indirect_15];
+ [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
+ sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15];
}
def IndirectSuperReg : IndirectSuper<"Indirect",
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 85bb73a..50605d0 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1215,15 +1215,15 @@ def CLAMP_SI : CLAMP<VReg_32>;
def FABS_SI : FABS<VReg_32>;
def FNEG_SI : FNEG<VReg_32>;
-def : Extract_Element <f32, v4f32, VReg_128, 0, sel_x>;
-def : Extract_Element <f32, v4f32, VReg_128, 1, sel_y>;
-def : Extract_Element <f32, v4f32, VReg_128, 2, sel_z>;
-def : Extract_Element <f32, v4f32, VReg_128, 3, sel_w>;
+def : Extract_Element <f32, v4f32, VReg_128, 0, sub0>;
+def : Extract_Element <f32, v4f32, VReg_128, 1, sub1>;
+def : Extract_Element <f32, v4f32, VReg_128, 2, sub2>;
+def : Extract_Element <f32, v4f32, VReg_128, 3, sub3>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sel_x>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sel_y>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sel_z>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sel_w>;
+def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sub0>;
+def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sub1>;
+def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sub2>;
+def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sub3>;
def : Vector_Build <v4f32, VReg_128, f32, VReg_32>;
def : Vector_Build <v4i32, SReg_128, i32, SReg_32>;
@@ -1338,22 +1338,22 @@ def : Pat <
def : Pat <
(int_AMDGPU_cube VReg_128:$src),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
- (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
- (EXTRACT_SUBREG VReg_128:$src, sel_y),
- (EXTRACT_SUBREG VReg_128:$src, sel_z),
- 0, 0, 0, 0), sel_x),
- (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
- (EXTRACT_SUBREG VReg_128:$src, sel_y),
- (EXTRACT_SUBREG VReg_128:$src, sel_z),
- 0, 0, 0, 0), sel_y),
- (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
- (EXTRACT_SUBREG VReg_128:$src, sel_y),
- (EXTRACT_SUBREG VReg_128:$src, sel_z),
- 0, 0, 0, 0), sel_z),
- (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
- (EXTRACT_SUBREG VReg_128:$src, sel_y),
- (EXTRACT_SUBREG VReg_128:$src, sel_z),
- 0, 0, 0, 0), sel_w)
+ (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
+ (EXTRACT_SUBREG VReg_128:$src, sub1),
+ (EXTRACT_SUBREG VReg_128:$src, sub2),
+ 0, 0, 0, 0), sub0),
+ (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
+ (EXTRACT_SUBREG VReg_128:$src, sub1),
+ (EXTRACT_SUBREG VReg_128:$src, sub2),
+ 0, 0, 0, 0), sub1),
+ (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
+ (EXTRACT_SUBREG VReg_128:$src, sub1),
+ (EXTRACT_SUBREG VReg_128:$src, sub2),
+ 0, 0, 0, 0), sub2),
+ (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
+ (EXTRACT_SUBREG VReg_128:$src, sub1),
+ (EXTRACT_SUBREG VReg_128:$src, sub2),
+ 0, 0, 0, 0), sub3)
>;
/********** ================== **********/
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index c3f1361..a1c7a86 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -1,18 +1,4 @@
-let Namespace = "AMDGPU" in {
- def low : SubRegIndex;
- def high : SubRegIndex;
-
- def sub0 : SubRegIndex;
- def sub1 : SubRegIndex;
- def sub2 : SubRegIndex;
- def sub3 : SubRegIndex;
- def sub4 : SubRegIndex;
- def sub5 : SubRegIndex;
- def sub6 : SubRegIndex;
- def sub7 : SubRegIndex;
-}
-
class SIReg <string n, bits<16> encoding = 0> : Register<n> {
let Namespace = "AMDGPU";
let HWEncoding = encoding;
@@ -20,7 +6,7 @@ class SIReg <string n, bits<16> encoding = 0> : Register<n> {
class SI_64 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
let Namespace = "AMDGPU";
- let SubRegIndices = [low, high];
+ let SubRegIndices = [sub0, sub1];
let HWEncoding = encoding;
}
@@ -73,12 +59,12 @@ def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add (sequence "SGPR%u", 0, 101))>;
// SGPR 64-bit registers
-def SGPR_64 : RegisterTuples<[low, high],
+def SGPR_64 : RegisterTuples<[sub0, sub1],
[(add (decimate SGPR_32, 2)),
(add(decimate (rotl SGPR_32, 1), 2))]>;
// SGPR 128-bit registers
-def SGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w],
+def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
[(add (decimate SGPR_32, 4)),
(add (decimate (rotl SGPR_32, 1), 4)),
(add (decimate (rotl SGPR_32, 2), 4)),
@@ -104,12 +90,12 @@ def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add (sequence "VGPR%u", 0, 255))>;
// VGPR 64-bit registers
-def VGPR_64 : RegisterTuples<[low, high],
+def VGPR_64 : RegisterTuples<[sub0, sub1],
[(add VGPR_32),
(add (rotl VGPR_32, 1))]>;
// VGPR 128-bit registers
-def VGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w],
+def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
[(add VGPR_32),
(add (rotl VGPR_32, 1)),
(add (rotl VGPR_32, 2)),
--
1.7.10.4
More information about the mesa-dev
mailing list