[Mesa-dev] [PATCH 2/2] radeonsi: Use pointers rather than resource descriptors for shader constants v3

Tom Stellard tom at stellard.net
Mon Aug 5 11:58:44 PDT 2013


From: Tom Stellard <thomas.stellard at amd.com>

The TGSI->LLVM pass for radeonsi preloads constants and relies on LLVM's
sinking pass to reduce SGPR usage by lowering constant reads to an
optimal place in the code.  However, LLVM's machine sink pass will not
lower instructions that have been selected from llvm.SI.load.const
intrinsics, because these instructions do not have a MachineMemOperand,
which LLVM needs in order to determine whether or not it is safe to sink
a load.  Replacing this intrinsic with a real load instruction will
enable the sinking optimization and probably a few others.

The other advantages of using pointers are:
+ Reduced register usage (pointers take 2 registers, descriptors take 4)
+ More code sharing with compute

This should also fix some crashes due to the compiler running out of
registers like in this bug:

https://bugs.freedesktop.org/show_bug.cgi?id=66805

v2:
  - Mark constant loads as invariant, so the machine sink pass will
    actually lower them.

v3:
  - Correctly specify the invariant.load metadata
---
 src/gallium/drivers/radeonsi/radeonsi_shader.c | 31 +++++++++++++++++++++-----
 src/gallium/drivers/radeonsi/si_state_draw.c   |  4 ++++
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index 4d8a479..d251252 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -115,20 +115,31 @@ static LLVMValueRef build_indexed_load(
 	return result;
 }
 
-static LLVMValueRef build_constant_load(
+static LLVMValueRef build_load_constant(
 	struct si_shader_context * si_shader_ctx,
 	LLVMValueRef base_ptr,
 	LLVMValueRef offset)
 {
 	struct lp_build_context * base =
 				&si_shader_ctx->radeon_bld.soa.bld_base.base;
+#if HAVE_LLVM <= 0x0303
 	LLVMValueRef args[2];
 	args[0] = base_ptr;
 	args[1] = offset;
 	return build_intrinsic(base->gallivm->builder, "llvm.SI.load.const",
 				base->elem_type, args, 2,
 				LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
-
+#else
+	unsigned md_invariant_load = LLVMGetMDKindIDInContext(
+				base->gallivm->context,	"invariant.load", 14);
+	LLVMValueRef dword_offset = LLVMBuildUDiv(base->gallivm->builder,
+				offset,
+				lp_build_const_int32(base->gallivm, 4), "");
+	LLVMValueRef load = build_indexed_load(si_shader_ctx, base_ptr, dword_offset);
+	LLVMValueRef md = LLVMMDNodeInContext(base->gallivm->context, NULL, 0);
+	LLVMSetMetadata(load, md_invariant_load, md);
+	return load;
+#endif
 }
 
 static LLVMValueRef get_instance_index(
@@ -450,7 +461,7 @@ static LLVMValueRef fetch_constant(
 	addr = lp_build_mul_imm(&bld_base->uint_bld, addr, 16);
 	args[1] = lp_build_add(&bld_base->uint_bld, addr, args[1]);
 
-	result = build_constant_load(si_shader_ctx, args[0], args[1]);
+	result = build_load_constant(si_shader_ctx, args[0], args[1]);
 
 	return bitcast(bld_base, type, result);
 }
@@ -609,7 +620,7 @@ static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base,
 				args[1] = lp_build_const_int32(base->gallivm,
 							       ((reg_index * 4 + chan) * 4 +
 								const_chan) * 4);
-				base_elt = build_constant_load(si_shader_ctx, args[0], args[1]);
+				base_elt = build_load_constant(si_shader_ctx, args[0], args[1]);
 				args[5 + chan] =
 					lp_build_add(base, args[5 + chan],
 						     lp_build_mul(base, base_elt,
@@ -1215,8 +1226,16 @@ static void create_function(struct si_shader_context *si_shader_ctx)
 	v2i32 = LLVMVectorType(i32, 2);
 	v3i32 = LLVMVectorType(i32, 3);
 
-	params[SI_PARAM_CONST] = LLVMPointerType(LLVMVectorType(i8, 16), CONST_ADDR_SPACE);
+#if HAVE_LLVM <= 0x0303
+	params[SI_PARAM_CONST] = LLVMPointerType(LLVMVectorType(i8, 16),
+							CONST_ADDR_SPACE);
 	params[SI_PARAM_SAMPLER] = params[SI_PARAM_CONST];
+#else
+	params[SI_PARAM_CONST] = LLVMPointerType(LLVMPointerType(f32,
+				 CONST_ADDR_SPACE), CONST_ADDR_SPACE);
+	params[SI_PARAM_SAMPLER] = LLVMPointerType(LLVMVectorType(i8, 16),
+							CONST_ADDR_SPACE);
+#endif
 	params[SI_PARAM_RESOURCE] = LLVMPointerType(LLVMVectorType(i8, 32), CONST_ADDR_SPACE);
 
 	if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
@@ -1298,7 +1317,7 @@ static void preload_constants(struct si_shader_context *si_shader_ctx)
 			si_shader_ctx->const_resource,
 			lp_build_const_int32(gallivm, i * 4)
 		};
-		si_shader_ctx->constants[i] = build_constant_load(si_shader_ctx,
+		si_shader_ctx->constants[i] = build_load_constant(si_shader_ctx,
 							args[0], args[1]);
 	}
 }
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 29d960d..efbee0d 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -494,6 +494,7 @@ static void si_constant_buffer_update(struct r600_context *rctx)
 				si_pm4_sh_data_add(pm4, va);
 				si_pm4_sh_data_add(pm4, (S_008F04_BASE_ADDRESS_HI(va >> 32) |
 							 S_008F04_STRIDE(0)));
+#if HAVE_LLVM <= 0x0303
 				si_pm4_sh_data_add(pm4, cb->buffer_size);
 				si_pm4_sh_data_add(pm4, S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
 						   S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
@@ -501,12 +502,15 @@ static void si_constant_buffer_update(struct r600_context *rctx)
 						   S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
 						   S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
 						   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32));
+#endif
 			} else {
 				/* Fill in an empty T# buffer resource description */
 				si_pm4_sh_data_add(pm4, 0);
 				si_pm4_sh_data_add(pm4, 0);
+#if HAVE_LLVM <= 0x0303
 				si_pm4_sh_data_add(pm4, 0);
 				si_pm4_sh_data_add(pm4, 0);
+#endif
 			}
 		}
 
-- 
1.8.1.5



More information about the mesa-dev mailing list