Mesa (master): radv: add support for 32-bit pointers in user data SGPRs

Samuel Pitoiset hakzsam at kemper.freedesktop.org
Tue May 22 13:53:17 UTC 2018


Module: Mesa
Branch: master
Commit: 36a4d6d08164344cbb4766944b3c45b2b223cf22
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=36a4d6d08164344cbb4766944b3c45b2b223cf22

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Wed May 16 17:40:47 2018 +0200

radv: add support for 32-bit pointers in user data SGPRs

We still use 64-bit GPU pointers for all ring buffers because
llvm.amdgcn.implicit.buffer.ptr doesn't seem to support 32-bit
GPU pointers for now. This can be improved later anyways.

Vega10:
Totals from affected shaders:
SGPRS: 1008722 -> 1026710 (1.78 %)
VGPRS: 706580 -> 707136 (0.08 %)
Spilled SGPRs: 22555 -> 22209 (-1.53 %)
Spilled VGPRs: 75 -> 75 (0.00 %)
Code Size: 34819208 -> 35202140 (1.10 %) bytes
Max Waves: 175423 -> 175086 (-0.19 %)

Polaris10:
Totals from affected shaders:
SGPRS: 1029849 -> 1036517 (0.65 %)
VGPRS: 709984 -> 708872 (-0.16 %)
Spilled SGPRs: 22672 -> 22309 (-1.60 %)
Spilled VGPRs: 82 -> 66 (-19.51 %)
Scratch size: 76 -> 60 (-21.05 %) dwords per thread
Code Size: 34915336 -> 35309752 (1.13 %) bytes
Max Waves: 151221 -> 151677 (0.30 %)

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>

---

 src/amd/vulkan/radv_cmd_buffer.c  | 13 +++++++------
 src/amd/vulkan/radv_device.c      |  6 ++++--
 src/amd/vulkan/radv_nir_to_llvm.c | 24 +++++++++++++++---------
 src/amd/vulkan/radv_private.h     | 18 ++++++++++++++----
 4 files changed, 40 insertions(+), 21 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 3636b2c8d9..5ab577b4c5 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -586,11 +586,12 @@ radv_emit_userdata_address(struct radv_cmd_buffer *cmd_buffer,
 	uint32_t base_reg = pipeline->user_data_0[stage];
 	if (loc->sgpr_idx == -1)
 		return;
-	assert(loc->num_sgprs == 2);
+
+	assert(loc->num_sgprs == (HAVE_32BIT_POINTERS ? 1 : 2));
 	assert(!loc->indirect);
 
-	radv_emit_shader_pointer(cmd_buffer->cs,
-				 base_reg + loc->sgpr_idx * 4, va);
+	radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
+				 base_reg + loc->sgpr_idx * 4, va, false);
 }
 
 static void
@@ -1442,10 +1443,10 @@ emit_stage_descriptor_set_userdata(struct radv_cmd_buffer *cmd_buffer,
 		return;
 
 	assert(!desc_set_loc->indirect);
-	assert(desc_set_loc->num_sgprs == 2);
+	assert(desc_set_loc->num_sgprs == (HAVE_32BIT_POINTERS ? 1 : 2));
 
-	radv_emit_shader_pointer(cmd_buffer->cs,
-				 base_reg + desc_set_loc->sgpr_idx * 4, va);
+	radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
+				 base_reg + desc_set_loc->sgpr_idx * 4, va, false);
 }
 
 static void
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index d6abab338e..61b4fba23f 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1963,7 +1963,8 @@ radv_emit_global_shader_pointers(struct radv_queue *queue,
 				   R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
 
 		for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
-			radv_emit_shader_pointer(cs, regs[i], va);
+			radv_emit_shader_pointer(queue->device, cs, regs[i],
+						 va, true);
 		}
 	} else {
 		uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
@@ -1974,7 +1975,8 @@ radv_emit_global_shader_pointers(struct radv_queue *queue,
 				   R_00B530_SPI_SHADER_USER_DATA_LS_0};
 
 		for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
-			radv_emit_shader_pointer(cs, regs[i], va);
+			radv_emit_shader_pointer(queue->device, cs, regs[i],
+						 va, true);
 		}
 	}
 }
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 9e42983cd1..c2cc5038b8 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -569,7 +569,10 @@ set_loc_shader(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx,
 static void
 set_loc_shader_ptr(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx)
 {
-	set_loc_shader(ctx, idx, sgpr_idx, 2);
+	bool use_32bit_pointers = HAVE_32BIT_POINTERS &&
+				  idx != AC_UD_SCRATCH_RING_OFFSETS;
+
+	set_loc_shader(ctx, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
 }
 
 static void
@@ -580,7 +583,7 @@ set_loc_desc(struct radv_shader_context *ctx, int idx,  uint8_t *sgpr_idx,
 		&ctx->shader_info->user_sgprs_locs.descriptor_sets[idx];
 	assert(ud_info);
 
-	set_loc(ud_info, sgpr_idx, 2, indirect_offset);
+	set_loc(ud_info, sgpr_idx, HAVE_32BIT_POINTERS ? 1 : 2, indirect_offset);
 }
 
 struct user_sgpr_info {
@@ -618,7 +621,8 @@ count_vs_user_sgprs(struct radv_shader_context *ctx)
 {
 	uint8_t count = 0;
 
-	count += ctx->shader_info->info.vs.has_vertex_buffers ? 2 : 0;
+	if (ctx->shader_info->info.vs.has_vertex_buffers)
+		count += HAVE_32BIT_POINTERS ? 1 : 2;
 	count += ctx->shader_info->info.vs.needs_draw_id ? 3 : 2;
 
 	return count;
@@ -685,13 +689,13 @@ static void allocate_user_sgprs(struct radv_shader_context *ctx,
 		user_sgpr_info->sgpr_count++;
 
 	if (ctx->shader_info->info.loads_push_constants)
-		user_sgpr_info->sgpr_count += 2;
+		user_sgpr_info->sgpr_count += HAVE_32BIT_POINTERS ? 1 : 2;
 
 	uint32_t available_sgprs = ctx->options->chip_class >= GFX9 ? 32 : 16;
 	uint32_t remaining_sgprs = available_sgprs - user_sgpr_info->sgpr_count;
 
 	if (remaining_sgprs / 2 < util_bitcount(ctx->shader_info->info.desc_set_used_mask)) {
-		user_sgpr_info->sgpr_count += 2;
+		user_sgpr_info->sgpr_count += HAVE_32BIT_POINTERS ? 1 : 2;
 		user_sgpr_info->indirect_all_descriptor_sets = true;
 	} else {
 		user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2;
@@ -707,7 +711,7 @@ declare_global_input_sgprs(struct radv_shader_context *ctx,
 			   struct arg_info *args,
 			   LLVMValueRef *desc_sets)
 {
-	LLVMTypeRef type = ac_array_in_const_addr_space(ctx->ac.i8);
+	LLVMTypeRef type = ac_array_in_const32_addr_space(ctx->ac.i8);
 	unsigned num_sets = ctx->options->layout ?
 			    ctx->options->layout->num_sets : 0;
 	unsigned stage_mask = 1 << stage;
@@ -725,7 +729,7 @@ declare_global_input_sgprs(struct radv_shader_context *ctx,
 			}
 		}
 	} else {
-		add_array_arg(args, ac_array_in_const_addr_space(type), desc_sets);
+		add_array_arg(args, ac_array_in_const32_addr_space(type), desc_sets);
 	}
 
 	if (ctx->shader_info->info.loads_push_constants) {
@@ -745,7 +749,8 @@ declare_vs_specific_input_sgprs(struct radv_shader_context *ctx,
 	    (stage == MESA_SHADER_VERTEX ||
 	     (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
 		if (ctx->shader_info->info.vs.has_vertex_buffers) {
-			add_arg(args, ARG_SGPR, ac_array_in_const_addr_space(ctx->ac.v4i32),
+			add_arg(args, ARG_SGPR,
+				ac_array_in_const32_addr_space(ctx->ac.v4i32),
 				&ctx->vertex_buffers);
 		}
 		add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex);
@@ -1878,7 +1883,8 @@ static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi,
 	index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), "");
 
 	list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, offset, 0));
-	list = LLVMBuildPointerCast(builder, list, ac_array_in_const_addr_space(type), "");
+	list = LLVMBuildPointerCast(builder, list,
+				    ac_array_in_const32_addr_space(type), "");
 
 	return ac_build_load_to_sgpr(&ctx->ac, list, index);
 }
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index adfd75c2a8..e2fa58d8d1 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -57,6 +57,7 @@
 #include "ac_nir_to_llvm.h"
 #include "ac_gpu_info.h"
 #include "ac_surface.h"
+#include "ac_llvm_build.h"
 #include "radv_descriptor_set.h"
 #include "radv_extensions.h"
 #include "radv_cs.h"
@@ -1130,12 +1131,21 @@ bool radv_get_memory_fd(struct radv_device *device,
 			int *pFD);
 
 static inline void
-radv_emit_shader_pointer(struct radeon_winsys_cs *cs,
-			 uint32_t sh_offset, uint64_t va)
+radv_emit_shader_pointer(struct radv_device *device,
+			 struct radeon_winsys_cs *cs,
+			 uint32_t sh_offset, uint64_t va, bool global)
 {
-	radeon_set_sh_reg_seq(cs, sh_offset, 2);
+	bool use_32bit_pointers = HAVE_32BIT_POINTERS && !global;
+
+	radeon_set_sh_reg_seq(cs, sh_offset, use_32bit_pointers ? 1 : 2);
 	radeon_emit(cs, va);
-	radeon_emit(cs, va >> 32);
+
+	if (use_32bit_pointers) {
+		assert(va == 0 ||
+		       (va >> 32) == device->physical_device->rad_info.address32_hi);
+	} else {
+		radeon_emit(cs, va >> 32);
+	}
 }
 
 static inline struct radv_descriptor_state *




More information about the mesa-commit mailing list