[Mesa-dev] [PATCH 2/4] R600/SI: Add basic support for more integer vector types.
Michel Dänzer
michel at daenzer.net
Fri Feb 1 07:39:44 PST 2013
From: Michel Dänzer <michel.daenzer at amd.com>
v1i32, v2i32, v8i32 and v16i32.
Only add VGPR register classes for integer vector types, to avoid attempts
copying from VGPR to SGPR registers, which is not possible.
Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
---
lib/Target/R600/AMDGPUAsmPrinter.cpp | 6 ++++
lib/Target/R600/AMDGPUInstructions.td | 51 +++++++++++++++++++++++++++++++++
lib/Target/R600/SIISelLowering.cpp | 7 +++--
lib/Target/R600/SIInstructions.td | 16 +++++++----
lib/Target/R600/SIRegisterInfo.td | 41 ++++++++++++++++++++++++--
5 files changed, 110 insertions(+), 11 deletions(-)
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index c057a8e..a8c9bb4 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -118,6 +118,12 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
} else if (AMDGPU::SReg_256RegClass.contains(reg)) {
isSGPR = true;
width = 8;
+ } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 8;
+ } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 16;
} else {
assert(!"Unknown register class");
}
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index 6765bc8..0559a5a 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -191,6 +191,19 @@ class Insert_Element <ValueType elem_type, ValueType vec_type,
>;
// Vector Build pattern
+class Vector1_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
+ (vecType (build_vector (elemType elemClass:$src))),
+ (vecType elemClass:$src)
+>;
+
+class Vector2_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
+ (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1))),
+ (INSERT_SUBREG (INSERT_SUBREG
+ (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1)
+>;
+
class Vector_Build <ValueType vecType, RegisterClass vectorClass,
ValueType elemType, RegisterClass elemClass> : Pat <
(vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
@@ -200,6 +213,44 @@ class Vector_Build <ValueType vecType, RegisterClass vectorClass,
elemClass:$z, sub2), elemClass:$w, sub3)
>;
+class Vector8_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
+ (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1),
+ (elemType elemClass:$sub2), (elemType elemClass:$sub3),
+ (elemType elemClass:$sub4), (elemType elemClass:$sub5),
+ (elemType elemClass:$sub6), (elemType elemClass:$sub7))),
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1),
+ elemClass:$sub2, sub2), elemClass:$sub3, sub3),
+ elemClass:$sub4, sub4), elemClass:$sub5, sub5),
+ elemClass:$sub6, sub6), elemClass:$sub7, sub7)
+>;
+
+class Vector16_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
+ (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1),
+ (elemType elemClass:$sub2), (elemType elemClass:$sub3),
+ (elemType elemClass:$sub4), (elemType elemClass:$sub5),
+ (elemType elemClass:$sub6), (elemType elemClass:$sub7),
+ (elemType elemClass:$sub8), (elemType elemClass:$sub9),
+ (elemType elemClass:$sub10), (elemType elemClass:$sub11),
+ (elemType elemClass:$sub12), (elemType elemClass:$sub13),
+ (elemType elemClass:$sub14), (elemType elemClass:$sub15))),
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1),
+ elemClass:$sub2, sub2), elemClass:$sub3, sub3),
+ elemClass:$sub4, sub4), elemClass:$sub5, sub5),
+ elemClass:$sub6, sub6), elemClass:$sub7, sub7),
+ elemClass:$sub8, sub8), elemClass:$sub9, sub9),
+ elemClass:$sub10, sub10), elemClass:$sub11, sub11),
+ elemClass:$sub12, sub12), elemClass:$sub13, sub13),
+ elemClass:$sub14, sub14), elemClass:$sub15, sub15)
+>;
+
// bitconvert pattern
class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
(dt (bitconvert (st rc:$src0))),
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 11ba452..202584b 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -34,8 +34,11 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass);
addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass);
- addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
- addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
+ addRegisterClass(MVT::v1i32, &AMDGPU::VReg_32RegClass);
+ addRegisterClass(MVT::v2i32, &AMDGPU::VReg_64RegClass);
+ addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
+ addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
+ addRegisterClass(MVT::v16i32, &AMDGPU::VReg_512RegClass);
computeRegisterProperties();
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 50605d0..54ea39a 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -584,7 +584,7 @@ defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
- [(set VReg_32:$dst, (fp_to_sint AllReg_32:$src0))]
+ [(set (i32 VReg_32:$dst), (fp_to_sint AllReg_32:$src0))]
>;
defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>;
@@ -998,17 +998,17 @@ def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
-class V_MOV_IMM <Operand immType, SDNode immNode> : InstSI <
+class V_MOV_IMM <ValueType type, Operand immType, SDNode immNode> : InstSI <
(outs VReg_32:$dst),
(ins immType:$src0),
"V_MOV_IMM",
- [(set VReg_32:$dst, (immNode:$src0))]
+ [(set VReg_32:$dst, (type immNode:$src0))]
>;
let isCodeGenOnly = 1, isPseudo = 1 in {
-def V_MOV_IMM_I32 : V_MOV_IMM<i32imm, imm>;
-def V_MOV_IMM_F32 : V_MOV_IMM<f32imm, fpimm>;
+def V_MOV_IMM_I32 : V_MOV_IMM<i32, i32imm, imm>;
+def V_MOV_IMM_F32 : V_MOV_IMM<f32, f32imm, fpimm>;
def S_MOV_IMM_I32 : InstSI <
(outs SReg_32:$dst),
@@ -1225,8 +1225,12 @@ def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sub1>;
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sub2>;
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sub3>;
+def : Vector1_Build <v1i32, VReg_32, i32, VReg_32>;
+def : Vector2_Build <v2i32, VReg_64, i32, VReg_32>;
def : Vector_Build <v4f32, VReg_128, f32, VReg_32>;
-def : Vector_Build <v4i32, SReg_128, i32, SReg_32>;
+def : Vector_Build <v4i32, VReg_128, i32, VReg_32>;
+def : Vector8_Build <v8i32, VReg_256, i32, VReg_32>;
+def : Vector16_Build <v16i32, VReg_512, i32, VReg_32>;
def : BitConvert <i32, f32, SReg_32>;
def : BitConvert <i32, f32, VReg_32>;
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index a1c7a86..9b483eb 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -101,6 +101,37 @@ def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
(add (rotl VGPR_32, 2)),
(add (rotl VGPR_32, 3))]>;
+// VGPR 256-bit registers
+def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
+ [(add VGPR_32),
+ (add (rotl VGPR_32, 1)),
+ (add (rotl VGPR_32, 2)),
+ (add (rotl VGPR_32, 3)),
+ (add (rotl VGPR_32, 4)),
+ (add (rotl VGPR_32, 5)),
+ (add (rotl VGPR_32, 6)),
+ (add (rotl VGPR_32, 7))]>;
+
+// VGPR 512-bit registers
+def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
+ sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
+ [(add VGPR_32),
+ (add (rotl VGPR_32, 1)),
+ (add (rotl VGPR_32, 2)),
+ (add (rotl VGPR_32, 3)),
+ (add (rotl VGPR_32, 4)),
+ (add (rotl VGPR_32, 5)),
+ (add (rotl VGPR_32, 6)),
+ (add (rotl VGPR_32, 7)),
+ (add (rotl VGPR_32, 8)),
+ (add (rotl VGPR_32, 9)),
+ (add (rotl VGPR_32, 10)),
+ (add (rotl VGPR_32, 11)),
+ (add (rotl VGPR_32, 12)),
+ (add (rotl VGPR_32, 13)),
+ (add (rotl VGPR_32, 14)),
+ (add (rotl VGPR_32, 15))]>;
+
// Register class for all scalar registers (SGPRs + Special Registers)
def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add SGPR_32, SREG_LIT_0, M0, EXEC_LO, EXEC_HI)
@@ -115,7 +146,7 @@ def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>;
def SReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add SGPR_256)>;
// Register class for all vector registers (VGPRs + Interploation Registers)
-def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32,
(add VGPR_32,
PERSP_SAMPLE_I, PERSP_SAMPLE_J,
PERSP_CENTER_I, PERSP_CENTER_J,
@@ -136,9 +167,13 @@ def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
)
>;
-def VReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add VGPR_64)>;
+def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>;
+
+def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>;
+
+def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>;
-def VReg_128 : RegisterClass<"AMDGPU", [v4f32], 128, (add VGPR_128)>;
+def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>;
// AllReg_* - A set of all scalar and vector registers of a given width.
def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add VReg_32, SReg_32)>;
--
1.7.10.4
More information about the mesa-dev
mailing list