[Mesa-dev] [PATCH 1/2] radeon/llvm: support for multiple const buffer
Vincent Lejeune
vljn at ovi.com
Mon Dec 17 09:42:02 PST 2012
---
lib/Target/AMDGPU/AMDGPUIntrinsics.td | 2 +-
.../AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp | 26 +++-
lib/Target/AMDGPU/R600ISelLowering.cpp | 57 +++++++-
lib/Target/AMDGPU/R600Instructions.td | 4 +-
lib/Target/AMDGPU/R600RegisterInfo.td | 149 +++++++++++++++++++--
lib/Target/AMDGPU/SIInstructions.td | 6 +-
6 files changed, 223 insertions(+), 21 deletions(-)
diff --git a/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/lib/Target/AMDGPU/AMDGPUIntrinsics.td
index 2ba2d4b..3bb2eee 100644
--- a/lib/Target/AMDGPU/AMDGPUIntrinsics.td
+++ b/lib/Target/AMDGPU/AMDGPUIntrinsics.td
@@ -13,7 +13,7 @@
let TargetPrefix = "AMDGPU", isTarget = 1 in {
- def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index 018234a..94f169d 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -307,11 +307,29 @@ void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx,
// value of the source select is defined in the r600isa docs.
if (MO.isReg()) {
unsigned Reg = MO.getReg();
- if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) {
- EmitByte(1, OS);
- } else {
- EmitByte(0, OS);
+ unsigned EmittedBank = 0;
+ unsigned BankClassId[16] = {
+ AMDGPU::R600_KC0_CReg32RegClassID,
+ AMDGPU::R600_KC1_CReg32RegClassID,
+ AMDGPU::R600_KC2_CReg32RegClassID,
+ AMDGPU::R600_KC3_CReg32RegClassID,
+ AMDGPU::R600_KC4_CReg32RegClassID,
+ AMDGPU::R600_KC5_CReg32RegClassID,
+ AMDGPU::R600_KC6_CReg32RegClassID,
+ AMDGPU::R600_KC7_CReg32RegClassID,
+ AMDGPU::R600_KC8_CReg32RegClassID,
+ AMDGPU::R600_KC9_CReg32RegClassID,
+ AMDGPU::R600_KC10_CReg32RegClassID,
+ AMDGPU::R600_KC11_CReg32RegClassID,
+ AMDGPU::R600_KC12_CReg32RegClassID,
+ AMDGPU::R600_KC13_CReg32RegClassID,
+ AMDGPU::R600_KC14_CReg32RegClassID,
+ AMDGPU::R600_KC15_CReg32RegClassID};
+ for (unsigned i = 0; i < 16; i++) {
+ if (AMDGPUMCRegisterClasses[BankClassId[i]].contains(Reg))
+ EmittedBank = i + 1;
}
+ EmitByte(EmittedBank, OS);
if (Reg == AMDGPU::ALU_LITERAL_X) {
unsigned ImmOpIndex = MI.getNumOperands() - 1;
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 3a4283c..16d2280 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -161,7 +161,62 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
case AMDGPU::R600_LOAD_CONST: {
int64_t RegIndex = MI->getOperand(1).getImm();
- unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
+ int64_t BankIndex = MI->getOperand(2).getImm();
+ unsigned ConstantReg;
+ switch (BankIndex) {
+ case 0:
+ ConstantReg = AMDGPU::R600_KC0_CReg32RegClass.getRegister(RegIndex);
+ break;
+ case 1:
+ ConstantReg = AMDGPU::R600_KC1_CReg32RegClass.getRegister(RegIndex);
+ break;
+ case 2:
+ ConstantReg = AMDGPU::R600_KC2_CReg32RegClass.getRegister(RegIndex);
+ break;
+ case 3:
+ ConstantReg = AMDGPU::R600_KC3_CReg32RegClass.getRegister(RegIndex);
+ break;
+ case 4:
+ ConstantReg = AMDGPU::R600_KC4_CReg32RegClass.getRegister(RegIndex);
+ break;
+ case 5:
+ ConstantReg = AMDGPU::R600_KC5_CReg32RegClass.getRegister(RegIndex);
+ break;
+ case 6:
+ ConstantReg = AMDGPU::R600_KC6_CReg32RegClass.getRegister(RegIndex);
+ break;
+ case 7:
+ ConstantReg = AMDGPU::R600_KC7_CReg32RegClass.getRegister(RegIndex);
+ break;
+ case 8:
+ ConstantReg = AMDGPU::R600_KC8_CReg32RegClass.getRegister(RegIndex);
+ break;
+ case 9:
+ ConstantReg = AMDGPU::R600_KC9_CReg32RegClass.getRegister(RegIndex);
+ break;
+ case 10:
+ ConstantReg = AMDGPU::R600_KC10_CReg32RegClass.getRegister(RegIndex);
+ break;
+ case 11:
+ ConstantReg = AMDGPU::R600_KC11_CReg32RegClass.getRegister(RegIndex);
+ break;
+ case 12:
+ ConstantReg = AMDGPU::R600_KC12_CReg32RegClass.getRegister(RegIndex);
+ break;
+ case 13:
+ ConstantReg = AMDGPU::R600_KC13_CReg32RegClass.getRegister(RegIndex);
+ break;
+ case 14:
+ ConstantReg = AMDGPU::R600_KC14_CReg32RegClass.getRegister(RegIndex);
+ break;
+ case 15:
+ ConstantReg = AMDGPU::R600_KC15_CReg32RegClass.getRegister(RegIndex);
+ break;
+ default:
+ assert( 0 && "Not a valid Bank Index !");
+ }
+
+
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
.addOperand(MI->getOperand(0))
.addReg(ConstantReg);
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index d89b03b..621ed09 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -1572,9 +1572,9 @@ def MASK_WRITE : AMDGPUShaderInst <
def R600_LOAD_CONST : AMDGPUShaderInst <
(outs R600_Reg32:$dst),
- (ins i32imm:$src0),
+ (ins i32imm:$src0, i32imm:$src1),
"R600_LOAD_CONST $dst, $src0",
- [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))]
+ [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0, imm:$src1))]
>;
def RESERVE_REG : AMDGPUShaderInst <
diff --git a/lib/Target/AMDGPU/R600RegisterInfo.td b/lib/Target/AMDGPU/R600RegisterInfo.td
index 3b21825..577938d 100644
--- a/lib/Target/AMDGPU/R600RegisterInfo.td
+++ b/lib/Target/AMDGPU/R600RegisterInfo.td
@@ -28,10 +28,6 @@ foreach Index = 0-127 in {
// 32-bit Temporary Registers
def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
- // 32-bit Constant Registers (There are more than 128, this the number
- // that is currently supported.
- def C#Index#_#Chan : R600RegWithChan <"C"#Index#"."#Chan, Index, Chan>;
-
// Indirect addressing offset registers
def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan,
Index, Chan>;
@@ -45,6 +41,16 @@ foreach Index = 0-127 in {
Index>;
}
+foreach Index = 0 - 255 in {
+ foreach Bank = 0 - 15 in {
+ foreach Chan = [ "X", "Y", "Z", "W" ] in {
+ // 32-bit Constant Registers (There are more than 128, this the number
+ // that is currently supported.
+ def KC#Bank#_C#Index#_#Chan : R600RegWithChan <"KC"#Bank#"_C"#Index#"."#Chan, Index, Chan>;
+ }
+ }
+}
+
// Array Base Register holding input in FS
foreach Index = 448-464 in {
def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>;
@@ -80,12 +86,135 @@ def R600_Addr : RegisterClass <"AMDGPU", [i32], 127,
} // End isAllocatable = 0
-def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
- (add (interleave
- (interleave (sequence "C%u_X", 0, 127),
- (sequence "C%u_Z", 0, 127)),
- (interleave (sequence "C%u_Y", 0, 127),
- (sequence "C%u_W", 0, 127))))>;
+def R600_KC0_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (interleave
+ (interleave (sequence "KC0_C%u_X", 0, 255),
+ (sequence "KC0_C%u_Z", 0, 255)),
+ (interleave (sequence "KC0_C%u_Y", 0, 255),
+ (sequence "KC0_C%u_W", 0, 255))))>;
+
+def R600_KC1_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (interleave
+ (interleave (sequence "KC1_C%u_X", 0, 255),
+ (sequence "KC1_C%u_Z", 0, 255)),
+ (interleave (sequence "KC1_C%u_Y", 0, 255),
+ (sequence "KC1_C%u_W", 0, 255))))>;
+
+def R600_KC2_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (interleave
+ (interleave (sequence "KC2_C%u_X", 0, 255),
+ (sequence "KC2_C%u_Z", 0, 255)),
+ (interleave (sequence "KC2_C%u_Y", 0, 255),
+ (sequence "KC2_C%u_W", 0, 255))))>;
+
+def R600_KC3_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (interleave
+ (interleave (sequence "KC3_C%u_X", 0, 255),
+ (sequence "KC3_C%u_Z", 0, 255)),
+ (interleave (sequence "KC3_C%u_Y", 0, 255),
+ (sequence "KC3_C%u_W", 0, 255))))>;
+
+def R600_KC4_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (interleave
+ (interleave (sequence "KC4_C%u_X", 0, 255),
+ (sequence "KC4_C%u_Z", 0, 255)),
+ (interleave (sequence "KC4_C%u_Y", 0, 255),
+ (sequence "KC4_C%u_W", 0, 255))))>;
+
+def R600_KC5_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (interleave
+ (interleave (sequence "KC5_C%u_X", 0, 255),
+ (sequence "KC5_C%u_Z", 0, 255)),
+ (interleave (sequence "KC5_C%u_Y", 0, 255),
+ (sequence "KC5_C%u_W", 0, 255))))>;
+
+def R600_KC6_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (interleave
+ (interleave (sequence "KC6_C%u_X", 0, 255),
+ (sequence "KC6_C%u_Z", 0, 255)),
+ (interleave (sequence "KC6_C%u_Y", 0, 255),
+ (sequence "KC6_C%u_W", 0, 255))))>;
+
+def R600_KC7_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (interleave
+ (interleave (sequence "KC7_C%u_X", 0, 255),
+ (sequence "KC7_C%u_Z", 0, 255)),
+ (interleave (sequence "KC7_C%u_Y", 0, 255),
+ (sequence "KC7_C%u_W", 0, 255))))>;
+
+def R600_KC8_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (interleave
+ (interleave (sequence "KC8_C%u_X", 0, 255),
+ (sequence "KC8_C%u_Z", 0, 255)),
+ (interleave (sequence "KC8_C%u_Y", 0, 255),
+ (sequence "KC8_C%u_W", 0, 255))))>;
+
+def R600_KC9_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (interleave
+ (interleave (sequence "KC9_C%u_X", 0, 255),
+ (sequence "KC9_C%u_Z", 0, 255)),
+ (interleave (sequence "KC9_C%u_Y", 0, 255),
+ (sequence "KC9_C%u_W", 0, 255))))>;
+
+def R600_KC10_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (interleave
+ (interleave (sequence "KC10_C%u_X", 0, 255),
+ (sequence "KC10_C%u_Z", 0, 255)),
+ (interleave (sequence "KC10_C%u_Y", 0, 255),
+ (sequence "KC10_C%u_W", 0, 255))))>;
+
+def R600_KC11_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (interleave
+ (interleave (sequence "KC11_C%u_X", 0, 255),
+ (sequence "KC11_C%u_Z", 0, 255)),
+ (interleave (sequence "KC11_C%u_Y", 0, 255),
+ (sequence "KC11_C%u_W", 0, 255))))>;
+
+def R600_KC12_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (interleave
+ (interleave (sequence "KC12_C%u_X", 0, 255),
+ (sequence "KC12_C%u_Z", 0, 255)),
+ (interleave (sequence "KC12_C%u_Y", 0, 255),
+ (sequence "KC12_C%u_W", 0, 255))))>;
+
+def R600_KC13_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (interleave
+ (interleave (sequence "KC13_C%u_X", 0, 255),
+ (sequence "KC13_C%u_Z", 0, 255)),
+ (interleave (sequence "KC13_C%u_Y", 0, 255),
+ (sequence "KC13_C%u_W", 0, 255))))>;
+
+def R600_KC14_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (interleave
+ (interleave (sequence "KC14_C%u_X", 0, 255),
+ (sequence "KC14_C%u_Z", 0, 255)),
+ (interleave (sequence "KC14_C%u_Y", 0, 255),
+ (sequence "KC14_C%u_W", 0, 255))))>;
+
+def R600_KC15_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (interleave
+ (interleave (sequence "KC15_C%u_X", 0, 255),
+ (sequence "KC15_C%u_Z", 0, 255)),
+ (interleave (sequence "KC15_C%u_Y", 0, 255),
+ (sequence "KC15_C%u_W", 0, 255))))>;
+
+def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
+ R600_KC0_CReg32,
+ R600_KC1_CReg32,
+ R600_KC2_CReg32,
+ R600_KC3_CReg32,
+ R600_KC4_CReg32,
+ R600_KC5_CReg32,
+ R600_KC6_CReg32,
+ R600_KC7_CReg32,
+ R600_KC8_CReg32,
+ R600_KC9_CReg32,
+ R600_KC10_CReg32,
+ R600_KC11_CReg32,
+ R600_KC12_CReg32,
+ R600_KC13_CReg32,
+ R600_KC14_CReg32,
+ R600_KC15_CReg32)>;
def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "T%u_X", 0, 127))>;
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index e9bbe23..bcf0635 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -1050,9 +1050,9 @@ def SET_M0 : InstSI <
def LOAD_CONST : AMDGPUShaderInst <
(outs GPRF32:$dst),
- (ins i32imm:$src),
- "LOAD_CONST $dst, $src",
- [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))]
+ (ins i32imm:$src0),
+ "LOAD_CONST $dst, $src0",
+ [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src0, imm))]
>;
let usesCustomInserter = 1 in {
--
1.8.0.2
More information about the mesa-dev
mailing list