[Mesa-dev] [PATCH 2/2] radeonsi: Use pointers rather than resource descriptors for shader constants

Tom Stellard tom at stellard.net
Fri Jul 19 13:18:07 PDT 2013


From: Tom Stellard <thomas.stellard at amd.com>

The TGSI->LLVM pass for radeonsi preloads constants and relies on LLVM's
sinking pass to reduce SGPR usage by lowering constant reads to an
optimal place in the code.  However, LLVM's machine sink pass will not
lower instructions that have been selected from llvm.SI.load.const
intrinsics, because these instructions do not have a MachineMemOperand,
which LLVM needs in order to determine whether or not it is safe to sink
a load.  Replacing this intrinsic with a real load instruction will
enable the sinking optimization and probably a few others.

The other advantages of using pointers are:
+ Reduced register usage (pointers take 2 registers, descriptors take 4)
+ More code sharing with compute

This should also fix some crashes due to the compiler running out of
registers like in this bug:

https://bugs.freedesktop.org/show_bug.cgi?id=66805
---
 src/gallium/drivers/radeonsi/radeonsi_shader.c | 26 ++++++++++++++++++++------
 src/gallium/drivers/radeonsi/si_state_draw.c   |  4 ++++
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index 4d8a479..eb63fc9 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -115,20 +115,26 @@ static LLVMValueRef build_indexed_load(
 	return result;
 }
 
-static LLVMValueRef build_constant_load(
+static LLVMValueRef build_load_constant(
 	struct si_shader_context * si_shader_ctx,
 	LLVMValueRef base_ptr,
 	LLVMValueRef offset)
 {
 	struct lp_build_context * base =
 				&si_shader_ctx->radeon_bld.soa.bld_base.base;
+#if HAVE_LLVM <= 0x0303
 	LLVMValueRef args[2];
 	args[0] = base_ptr;
 	args[1] = offset;
 	return build_intrinsic(base->gallivm->builder, "llvm.SI.load.const",
 				base->elem_type, args, 2,
 				LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
-
+#else
+	LLVMValueRef dword_offset = LLVMBuildUDiv(base->gallivm->builder,
+				offset,
+				lp_build_const_int32(base->gallivm, 4), "");
+	return build_indexed_load(si_shader_ctx, base_ptr, dword_offset);
+#endif
 }
 
 static LLVMValueRef get_instance_index(
@@ -450,7 +456,7 @@ static LLVMValueRef fetch_constant(
 	addr = lp_build_mul_imm(&bld_base->uint_bld, addr, 16);
 	args[1] = lp_build_add(&bld_base->uint_bld, addr, args[1]);
 
-	result = build_constant_load(si_shader_ctx, args[0], args[1]);
+	result = build_load_constant(si_shader_ctx, args[0], args[1]);
 
 	return bitcast(bld_base, type, result);
 }
@@ -609,7 +615,7 @@ static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base,
 				args[1] = lp_build_const_int32(base->gallivm,
 							       ((reg_index * 4 + chan) * 4 +
 								const_chan) * 4);
-				base_elt = build_constant_load(si_shader_ctx, args[0], args[1]);
+				base_elt = build_load_constant(si_shader_ctx, args[0], args[1]);
 				args[5 + chan] =
 					lp_build_add(base, args[5 + chan],
 						     lp_build_mul(base, base_elt,
@@ -1215,8 +1221,16 @@ static void create_function(struct si_shader_context *si_shader_ctx)
 	v2i32 = LLVMVectorType(i32, 2);
 	v3i32 = LLVMVectorType(i32, 3);
 
-	params[SI_PARAM_CONST] = LLVMPointerType(LLVMVectorType(i8, 16), CONST_ADDR_SPACE);
+#if HAVE_LLVM <= 0x0303
+	params[SI_PARAM_CONST] = LLVMPointerType(LLVMVectorType(i8, 16),
+							CONST_ADDR_SPACE);
 	params[SI_PARAM_SAMPLER] = params[SI_PARAM_CONST];
+#else
+	params[SI_PARAM_CONST] = LLVMPointerType(LLVMPointerType(f32,
+				 CONST_ADDR_SPACE), CONST_ADDR_SPACE);
+	params[SI_PARAM_SAMPLER] = LLVMPointerType(LLVMVectorType(i8, 16),
+							CONST_ADDR_SPACE);
+#endif
 	params[SI_PARAM_RESOURCE] = LLVMPointerType(LLVMVectorType(i8, 32), CONST_ADDR_SPACE);
 
 	if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
@@ -1298,7 +1312,7 @@ static void preload_constants(struct si_shader_context *si_shader_ctx)
 			si_shader_ctx->const_resource,
 			lp_build_const_int32(gallivm, i * 4)
 		};
-		si_shader_ctx->constants[i] = build_constant_load(si_shader_ctx,
+		si_shader_ctx->constants[i] = build_load_constant(si_shader_ctx,
 							args[0], args[1]);
 	}
 }
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 29d960d..efbee0d 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -494,6 +494,7 @@ static void si_constant_buffer_update(struct r600_context *rctx)
 				si_pm4_sh_data_add(pm4, va);
 				si_pm4_sh_data_add(pm4, (S_008F04_BASE_ADDRESS_HI(va >> 32) |
 							 S_008F04_STRIDE(0)));
+#if HAVE_LLVM <= 0x0303
 				si_pm4_sh_data_add(pm4, cb->buffer_size);
 				si_pm4_sh_data_add(pm4, S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
 						   S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
@@ -501,12 +502,15 @@ static void si_constant_buffer_update(struct r600_context *rctx)
 						   S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
 						   S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
 						   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32));
+#endif
 			} else {
 				/* Fill in an empty T# buffer resource description */
 				si_pm4_sh_data_add(pm4, 0);
 				si_pm4_sh_data_add(pm4, 0);
+#if HAVE_LLVM <= 0x0303
 				si_pm4_sh_data_add(pm4, 0);
 				si_pm4_sh_data_add(pm4, 0);
+#endif
 			}
 		}
 
-- 
1.8.1.5



More information about the mesa-dev mailing list