Mesa (master): radeonsi: Remove use.sgpr* intrinsics, use load instructions instead

Tom Stellard tstellar at kemper.freedesktop.org
Tue May 29 17:14:30 UTC 2012


Module: Mesa
Branch: master
Commit: 89ece086bcd2186ab53cb6a69d53005893cab0ea
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=89ece086bcd2186ab53cb6a69d53005893cab0ea

Author: Tom Stellard <thomas.stellard at amd.com>
Date:   Tue May 29 11:36:29 2012 -0400

radeonsi: Remove use.sgpr* intrinsics, use load instructions instead

We now model loading uses sgpr values with LLVM IR load instructions that
use the USER_SGPR address space.

The definition of the sgpr parameter to the use_sgpr() helper function
in radeonsi_shader.c has changed so that you can pass raw sgpr values
rather than having to divide the sgpr value you want to use by the dword
width of the type you want to load.

---

 src/gallium/drivers/radeon/AMDIL.h             |    3 +-
 src/gallium/drivers/radeon/SIISelLowering.cpp  |    3 +
 src/gallium/drivers/radeon/SIInstructions.td   |   40 +++++-------
 src/gallium/drivers/radeon/SIIntrinsics.td     |    5 --
 src/gallium/drivers/radeonsi/radeonsi_shader.c |   80 +++++++++++-------------
 5 files changed, 57 insertions(+), 74 deletions(-)

diff --git a/src/gallium/drivers/radeon/AMDIL.h b/src/gallium/drivers/radeon/AMDIL.h
index 45638f6..8bd024a 100644
--- a/src/gallium/drivers/radeon/AMDIL.h
+++ b/src/gallium/drivers/radeon/AMDIL.h
@@ -129,7 +129,8 @@ enum AddressSpaces {
   ADDRESS_NONE     = 5, // Address space for unknown memory.
   PARAM_D_ADDRESS  = 6, // Address space for direct addressible parameter memory (CONST0)
   PARAM_I_ADDRESS  = 7, // Address space for indirect addressible parameter memory (VTX1)
-  LAST_ADDRESS     = 8
+  USER_SGPR_ADDRESS = 8, // Address space for USER_SGPRS on SI
+  LAST_ADDRESS     = 9
 };
 
 // This union/struct combination is an easy way to read out the
diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp
index a79aba9..1cdcd36 100644
--- a/src/gallium/drivers/radeon/SIISelLowering.cpp
+++ b/src/gallium/drivers/radeon/SIISelLowering.cpp
@@ -188,6 +188,9 @@ void SITargetLowering::lowerUSE_SGPR(MachineInstr *MI,
   unsigned dstReg = MI->getOperand(0).getReg();
   int64_t newIndex = MI->getOperand(1).getImm();
   const TargetRegisterClass * dstClass = MRI.getRegClass(dstReg);
+  unsigned DwordWidth = dstClass->getSize() / 4;
+  assert(newIndex % DwordWidth == 0 && "USER_SGPR not properly aligned");
+  newIndex = newIndex / DwordWidth;
 
   unsigned newReg = dstClass->getRegister(newIndex);
   addLiveIn(MI, MF, MRI, TII, newReg); 
diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td
index a77b8bd..fc8ec4a 100644
--- a/src/gallium/drivers/radeon/SIInstructions.td
+++ b/src/gallium/drivers/radeon/SIInstructions.td
@@ -7,6 +7,18 @@
 //
 //===----------------------------------------------------------------------===//
 
+def load_user_sgpr : PatFrag<(ops node:$ptr),
+  (load node:$ptr),
+  [{
+    const Value *Src = cast<LoadSDNode>(N)->getSrcValue();
+    if (Src) {
+      PointerType * PT = dyn_cast<PointerType>(Src->getType());
+      return PT && PT->getAddressSpace() == AMDILAS::USER_SGPR_ADDRESS;
+    }
+    return false;
+  }]
+>;
+
 
 def isSI : Predicate<"Subtarget.device()"
                             "->getGeneration() == AMDILDeviceInfo::HD7XXX">;
@@ -826,26 +838,19 @@ def SI_INTERP_CONST : InstSI <
                                                  imm:$attr, SReg_32:$params))]
 >;
 
-
 def USE_SGPR_32 : InstSI <
   (outs SReg_32:$dst),
   (ins i32imm:$src0),
   "USE_SGPR_32",
-  [(set SReg_32:$dst, (int_SI_use_sgpr imm:$src0))]
-
-> {
-  field bits<32> Inst = 0;
-}
+  [(set (i32 SReg_32:$dst), (load_user_sgpr imm:$src0))]
+>;
 
 def USE_SGPR_64 : InstSI <
   (outs SReg_64:$dst),
   (ins i32imm:$src0),
   "USE_SGPR_64",
-  [(set SReg_64:$dst, (int_SI_use_sgpr imm:$src0))]
-
-> {
-  field bits<32> Inst = 0;
-}
+  [(set (i64 SReg_64:$dst), (load_user_sgpr imm:$src0))]
+>;
 
 def VS_LOAD_BUFFER_INDEX : InstSI <
   (outs VReg_32:$dst),
@@ -869,19 +874,6 @@ def : Pat<
                            0, 0, (i32 SREG_LIT_0))
 >;
 
-def : Pat<
-  (int_SI_use_sgprptrcf32 imm:$src0),
-  (USE_SGPR_64 imm:$src0)
->;
-def : Pat<
-  (int_SI_use_sgprptrci128 imm:$src0),
-  (USE_SGPR_64 imm:$src0)
->;
-def : Pat<
-  (int_SI_use_sgprptrci256 imm:$src0),
-  (USE_SGPR_64 imm:$src0)
->;
-
 /* int_SI_export */
 def : Pat <
   (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
diff --git a/src/gallium/drivers/radeon/SIIntrinsics.td b/src/gallium/drivers/radeon/SIIntrinsics.td
index d8bf4fa..95273a2 100644
--- a/src/gallium/drivers/radeon/SIIntrinsics.td
+++ b/src/gallium/drivers/radeon/SIIntrinsics.td
@@ -21,11 +21,6 @@ let TargetPrefix = "SI", isTarget = 1 in {
   def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], []> ;
 
   def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty]>;
-  def int_SI_use_sgpr : Intrinsic <[llvm_anyint_ty], [llvm_i32_ty], [IntrNoMem]>;
-  class int_SI_use_sgprptr : Intrinsic <[llvm_anyptr_ty], [llvm_i32_ty], []>;
-  def int_SI_use_sgprptrcf32 : int_SI_use_sgprptr;
-  def int_SI_use_sgprptrci128 : int_SI_use_sgprptr;
-  def int_SI_use_sgprptrci256 : int_SI_use_sgprptr;
 
   /* Interpolation Intrinsics */
 
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index 4f99549..e625a74 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -67,6 +67,7 @@ static struct si_shader_context * si_shader_context(
 
 #define USE_SGPR_MAX_SUFFIX_LEN 5
 #define CONST_ADDR_SPACE 2
+#define USER_SGPR_ADDR_SPACE 8
 
 enum sgpr_type {
 	SGPR_CONST_PTR_F32,
@@ -99,10 +100,19 @@ static LLVMValueRef build_indexed_load(
 	return LLVMBuildLoad(gallivm->builder, computed_ptr, "");
 }
 
-/*
- * XXX: Instead of using an intrinsic to use a specific SGPR, we should be
- * using load instructions.  The loads should load from the USER_SGPR address
- * space and use the sgpr index as the pointer.
+/**
+ * Load a value stored in one of the user SGPRs
+ *
+ * @param sgpr This is the sgpr to load the value from.  If you need to load a
+ * value that is stored in consecutive SGPR registers (e.g. a 64-bit pointer),
+ * then you should pass the index of the first SGPR that holds the value.  For
+ * example, if you want to load a pointer that is stored in SGPRs 2 and 3, then
+ * use pass 2 for the sgpr parameter.
+ *
+ * The value of the sgpr parameter must also be aligned to the width of the type
+ * being loaded, so that the sgpr parameter is divisible by the dword width of the
+ * type.  For example, if the value being loaded is two dwords wide, then the sgpr
+ * parameter must be divisible by two.
  */
 static LLVMValueRef use_sgpr(
 	struct gallivm_state * gallivm,
@@ -111,44 +121,48 @@ static LLVMValueRef use_sgpr(
 {
 	LLVMValueRef sgpr_index;
 	LLVMTypeRef ret_type;
+	LLVMValueRef ptr;
 
 	sgpr_index = lp_build_const_int32(gallivm, sgpr);
 
 	switch (type) {
 	case SGPR_CONST_PTR_F32:
+		assert(sgpr % 2 == 0);
 		ret_type = LLVMFloatTypeInContext(gallivm->context);
 		ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
-		return lp_build_intrinsic_unary(gallivm->builder,
-						"llvm.SI.use.sgprptrcf32.",
-						ret_type, sgpr_index);
+		break;
+
 	case SGPR_I32:
 		ret_type = LLVMInt32TypeInContext(gallivm->context);
-		return lp_build_intrinsic_unary(gallivm->builder,
-						"llvm.SI.use.sgpr.i32",
-						ret_type, sgpr_index);
+		break;
+
 	case SGPR_I64:
+		assert(sgpr % 2 == 0);
 		ret_type= LLVMInt64TypeInContext(gallivm->context);
-		return lp_build_intrinsic_unary(gallivm->builder,
-				"llvm.SI.use.sgpr.i64",
-				 ret_type, sgpr_index);
+		break;
+
 	case SGPR_CONST_PTR_V4I32:
+		assert(sgpr % 2 == 0);
 		ret_type = LLVMInt32TypeInContext(gallivm->context);
 		ret_type = LLVMVectorType(ret_type, 4);
 		ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
-		return lp_build_intrinsic_unary(gallivm->builder,
-						"llvm.SI.use.sgprptrci128.",
-						ret_type, sgpr_index);
+		break;
+
 	case SGPR_CONST_PTR_V8I32:
+		assert(sgpr % 2 == 0);
 		ret_type = LLVMInt32TypeInContext(gallivm->context);
 		ret_type = LLVMVectorType(ret_type, 8);
 		ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
-		return lp_build_intrinsic_unary(gallivm->builder,
-						"llvm.SI.use.sgprptrci256.",
-						ret_type, sgpr_index);
+		break;
+
 	default:
 		assert(!"Unsupported SGPR type in use_sgpr()");
 		return NULL;
 	}
+
+	ret_type = LLVMPointerType(ret_type, USER_SGPR_ADDR_SPACE);
+	ptr = LLVMBuildIntToPtr(gallivm->builder, sgpr_index, ret_type, "");
+	return LLVMBuildLoad(gallivm->builder, ptr, "");
 }
 
 static void declare_input_vs(
@@ -174,7 +188,7 @@ static void declare_input_vs(
 	/* XXX: Communicate with the rest of the driver about which SGPR the T#
 	 * list pointer is going to be stored in.  Hard code to SGPR[6:7] for
  	 * now */
-	t_list_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_V4I32, 3);
+	t_list_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_V4I32, 6);
 
 	t_offset = lp_build_const_int32(base->gallivm, velem->vertex_buffer_index);
 
@@ -320,27 +334,6 @@ static LLVMValueRef fetch_constant(
 	return build_indexed_load(base->gallivm, const_ptr, offset);
 }
 
-
-/* Declare some intrinsics with the correct attributes */
-static void si_llvm_emit_prologue(struct lp_build_tgsi_context * bld_base)
-{
-	LLVMValueRef function;
-	struct gallivm_state * gallivm = bld_base->base.gallivm;
-
-	LLVMTypeRef i64 = LLVMInt64TypeInContext(gallivm->context);
-	LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
-
-	/* declare i32 @llvm.SI.use.sgpr.i32(i32) */
-	function = lp_declare_intrinsic(gallivm->module, "llvm.SI.use.sgpr.i32",
-					i32, &i32, 1);
-	LLVMAddFunctionAttr(function, LLVMReadNoneAttribute);
-
-	/* declare i64 @llvm.SI.use.sgpr.i64(i32) */
-	function = lp_declare_intrinsic(gallivm->module, "llvm.SI.use.sgpr.i64",
-					i64, &i32, 1);
-	LLVMAddFunctionAttr(function, LLVMReadNoneAttribute);
-}
-
 /* XXX: This is partially implemented for VS only at this point.  It is not complete */
 static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
 {
@@ -504,14 +497,14 @@ static void tex_fetch_args(
 							0, LP_CHAN_ALL);
 
 	/* Resource */
-	ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, 2);
+	ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, 4);
 	offset = lp_build_const_int32(bld_base->base.gallivm,
 				  8 * emit_data->inst->Src[1].Register.Index);
 	emit_data->args[2] = build_indexed_load(bld_base->base.gallivm,
 						ptr, offset);
 
 	/* Sampler */
-	ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V4I32, 1);
+	ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V4I32, 2);
 	offset = lp_build_const_int32(bld_base->base.gallivm,
 				  4 * emit_data->inst->Src[1].Register.Index);
 	emit_data->args[3] = build_indexed_load(bld_base->base.gallivm,
@@ -557,7 +550,6 @@ int si_pipe_shader_create(
 	tgsi_scan_shader(shader->tokens, &shader_info);
 	bld_base->info = &shader_info;
 	bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
-	bld_base->emit_prologue = si_llvm_emit_prologue;
 	bld_base->emit_epilogue = si_llvm_emit_epilogue;
 
 	bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;




More information about the mesa-commit mailing list