[Mesa-dev] [PATCH 5/7] radeonsi: add TGSI_SEMANTIC_CS_USER_DATA for reading up to 4 SGPRs with TGSI

Marek Olšák maraeo at gmail.com
Tue Aug 21 05:50:34 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_compute.c        | 16 +++++++++++++---
 src/gallium/drivers/radeonsi/si_compute.h        |  1 +
 src/gallium/drivers/radeonsi/si_pipe.h           |  1 +
 src/gallium/drivers/radeonsi/si_shader.c         | 11 +++++++++++
 src/gallium/drivers/radeonsi/si_shader.h         |  7 +++++++
 .../drivers/radeonsi/si_shader_internal.h        |  1 +
 6 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index ea6fa3e999d..c5d3d5fcf02 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -121,20 +121,22 @@ static void si_create_compute_state_async(void *job, int thread_index)
 				 &program->active_samplers_and_images);
 
 	program->shader.selector = &sel;
 	program->shader.is_monolithic = true;
 	program->uses_grid_size = sel.info.uses_grid_size;
 	program->uses_bindless_samplers = sel.info.uses_bindless_samplers;
 	program->uses_bindless_images = sel.info.uses_bindless_images;
 	program->reads_variable_block_size =
 		sel.info.uses_block_size &&
 		sel.info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0;
+	program->num_cs_user_data_dwords =
+		sel.info.properties[TGSI_PROPERTY_CS_USER_DATA_DWORDS];
 
 	void *ir_binary = si_get_ir_binary(&sel);
 
 	/* Try to load the shader from the shader cache. */
 	mtx_lock(&sscreen->shader_cache_mutex);
 
 	if (ir_binary &&
 	    si_shader_cache_load_shader(sscreen, ir_binary, shader)) {
 		mtx_unlock(&sscreen->shader_cache_mutex);
 
@@ -152,21 +154,22 @@ static void si_create_compute_state_async(void *job, int thread_index)
 
 			if (program->ir_type == PIPE_SHADER_IR_TGSI)
 				FREE(program->ir.tgsi);
 			program->shader.selector = NULL;
 			return;
 		}
 
 		bool scratch_enabled = shader->config.scratch_bytes_per_wave > 0;
 		unsigned user_sgprs = SI_NUM_RESOURCE_SGPRS +
 				      (sel.info.uses_grid_size ? 3 : 0) +
-				      (program->reads_variable_block_size ? 3 : 0);
+				      (program->reads_variable_block_size ? 3 : 0) +
+				      program->num_cs_user_data_dwords;
 
 		shader->config.rsrc1 =
 			S_00B848_VGPRS((shader->config.num_vgprs - 1) / 4) |
 			S_00B848_SGPRS((shader->config.num_sgprs - 1) / 8) |
 			S_00B848_DX10_CLAMP(1) |
 			S_00B848_FLOAT_MODE(shader->config.float_mode);
 
 		shader->config.rsrc2 =
 			S_00B84C_USER_SGPR(user_sgprs) |
 			S_00B84C_SCRATCH_EN(scratch_enabled) |
@@ -699,30 +702,32 @@ static bool si_upload_compute_input(struct si_context *sctx,
 		radeon_emit(cs, kernel_args_va);
 		radeon_emit(cs, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) |
 		                S_008F04_STRIDE(0));
 	}
 
 	r600_resource_reference(&input_buffer, NULL);
 
 	return true;
 }
 
-static void si_setup_tgsi_grid(struct si_context *sctx,
+static void si_setup_tgsi_user_data(struct si_context *sctx,
                                 const struct pipe_grid_info *info)
 {
 	struct si_compute *program = sctx->cs_shader_state.program;
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 	unsigned grid_size_reg = R_00B900_COMPUTE_USER_DATA_0 +
 				 4 * SI_NUM_RESOURCE_SGPRS;
 	unsigned block_size_reg = grid_size_reg +
 				  /* 12 bytes = 3 dwords. */
 				  12 * program->uses_grid_size;
+	unsigned cs_user_data_reg = block_size_reg +
+				    12 * program->reads_variable_block_size;
 
 	if (info->indirect) {
 		if (program->uses_grid_size) {
 			uint64_t base_va = r600_resource(info->indirect)->gpu_address;
 			uint64_t va = base_va + info->indirect_offset;
 			int i;
 
 			radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 					 r600_resource(info->indirect),
 					 RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
@@ -744,20 +749,25 @@ static void si_setup_tgsi_grid(struct si_context *sctx,
 			radeon_emit(cs, info->grid[1]);
 			radeon_emit(cs, info->grid[2]);
 		}
 		if (program->reads_variable_block_size) {
 			radeon_set_sh_reg_seq(cs, block_size_reg, 3);
 			radeon_emit(cs, info->block[0]);
 			radeon_emit(cs, info->block[1]);
 			radeon_emit(cs, info->block[2]);
 		}
 	}
+
+	if (program->num_cs_user_data_dwords) {
+		radeon_set_sh_reg_seq(cs, cs_user_data_reg, program->num_cs_user_data_dwords);
+		radeon_emit_array(cs, sctx->cs_user_data, program->num_cs_user_data_dwords);
+	}
 }
 
 static void si_emit_dispatch_packets(struct si_context *sctx,
                                      const struct pipe_grid_info *info)
 {
 	struct si_screen *sscreen = sctx->screen;
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 	bool render_cond_bit = sctx->render_cond && !sctx->render_cond_force_off;
 	unsigned waves_per_threadgroup =
 		DIV_ROUND_UP(info->block[0] * info->block[1] * info->block[2], 64);
@@ -901,21 +911,21 @@ static void si_launch_grid(
 			r600_resource(program->global_buffers[i]);
 		if (!buffer) {
 			continue;
 		}
 		radeon_add_to_buffer_list(sctx, sctx->gfx_cs, buffer,
 					  RADEON_USAGE_READWRITE,
 					  RADEON_PRIO_COMPUTE_GLOBAL);
 	}
 
 	if (program->ir_type != PIPE_SHADER_IR_NATIVE)
-		si_setup_tgsi_grid(sctx, info);
+		si_setup_tgsi_user_data(sctx, info);
 
 	si_emit_dispatch_packets(sctx, info);
 
 	if (unlikely(sctx->current_saved_cs)) {
 		si_trace_emit(sctx);
 		si_log_compute_state(sctx, sctx->log);
 	}
 
 	sctx->compute_is_busy = true;
 	sctx->num_compute_calls++;
diff --git a/src/gallium/drivers/radeonsi/si_compute.h b/src/gallium/drivers/radeonsi/si_compute.h
index ef8b4aec4df..99b501673c5 100644
--- a/src/gallium/drivers/radeonsi/si_compute.h
+++ b/src/gallium/drivers/radeonsi/si_compute.h
@@ -50,20 +50,21 @@ struct si_compute {
 	unsigned private_size;
 	unsigned input_size;
 	struct si_shader shader;
 
 	struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS];
 	unsigned use_code_object_v2 : 1;
 	unsigned uses_grid_size:1;
 	unsigned uses_bindless_samplers:1;
 	unsigned uses_bindless_images:1;
 	bool reads_variable_block_size;
+	unsigned num_cs_user_data_dwords;
 };
 
 void si_destroy_compute(struct si_compute *program);
 
 static inline void
 si_compute_reference(struct si_compute **dst, struct si_compute *src)
 {
 	if (pipe_reference(&(*dst)->reference, &src->reference))
 		si_destroy_compute(*dst);
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 4c3f13b84e2..100d0166f62 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -847,20 +847,21 @@ struct si_context {
 	struct pipe_constant_buffer	null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
 	struct pipe_resource		*esgs_ring;
 	struct pipe_resource		*gsvs_ring;
 	struct pipe_resource		*tess_rings;
 	union pipe_color_union		*border_color_table; /* in CPU memory, any endian */
 	struct r600_resource		*border_color_buffer;
 	union pipe_color_union		*border_color_map; /* in VRAM (slow access), little endian */
 	unsigned			border_color_count;
 	unsigned			num_vs_blit_sgprs;
 	uint32_t			vs_blit_sh_data[SI_VS_BLIT_SGPRS_POS_TEXCOORD];
+	uint32_t			cs_user_data[4];
 
 	/* Vertex and index buffers. */
 	bool				vertex_buffers_dirty;
 	bool				vertex_buffer_pointer_dirty;
 	struct pipe_vertex_buffer	vertex_buffer[SI_NUM_VERTEX_BUFFERS];
 
 	/* MSAA config state. */
 	int				ps_iter_samples;
 	bool				ps_uses_fbfetch;
 	bool				smoothing_enabled;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index cfd99b61601..c51e91b1d3d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2260,20 +2260,24 @@ void si_load_system_value(struct si_shader_context *ctx,
 		}
 		id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, "");
 		value = LLVMBuildShl(ctx->ac.builder, value, id, "");
 		if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK ||
 		    decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LT_MASK)
 			value = LLVMBuildNot(ctx->ac.builder, value, "");
 		value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->v2i32, "");
 		break;
 	}
 
+	case TGSI_SEMANTIC_CS_USER_DATA:
+		value = LLVMGetParam(ctx->main_fn, ctx->param_cs_user_data);
+		break;
+
 	default:
 		assert(!"unknown system value");
 		return;
 	}
 
 	ctx->system_values[index] = value;
 }
 
 void si_declare_compute_memory(struct si_shader_context *ctx)
 {
@@ -4941,20 +4945,27 @@ static void create_function(struct si_shader_context *ctx)
 
 	case PIPE_SHADER_COMPUTE:
 		declare_global_desc_pointers(ctx, &fninfo);
 		declare_per_stage_desc_pointers(ctx, &fninfo, true);
 		if (shader->selector->info.uses_grid_size)
 			add_arg_assign(&fninfo, ARG_SGPR, v3i32, &ctx->abi.num_work_groups);
 		if (shader->selector->info.uses_block_size &&
 		    shader->selector->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0)
 			ctx->param_block_size = add_arg(&fninfo, ARG_SGPR, v3i32);
 
+		unsigned cs_user_data_dwords =
+			shader->selector->info.properties[TGSI_PROPERTY_CS_USER_DATA_DWORDS];
+		if (cs_user_data_dwords) {
+			ctx->param_cs_user_data = add_arg(&fninfo, ARG_SGPR,
+							  LLVMVectorType(ctx->i32, cs_user_data_dwords));
+		}
+
 		for (i = 0; i < 3; i++) {
 			ctx->abi.workgroup_ids[i] = NULL;
 			if (shader->selector->info.uses_block_id[i])
 				add_arg_assign(&fninfo, ARG_SGPR, ctx->i32, &ctx->abi.workgroup_ids[i]);
 		}
 
 		add_arg_assign(&fninfo, ARG_VGPR, v3i32, &ctx->abi.local_invocation_ids);
 		break;
 	default:
 		assert(0 && "unimplemented shader");
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 2dc4bc7e787..14230b82073 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -269,28 +269,35 @@ enum {
 #define C_VS_STATE_CLAMP_VERTEX_COLOR		0xFFFFFFFE
 #define S_VS_STATE_INDEXED(x)			(((unsigned)(x) & 0x1) << 1)
 #define C_VS_STATE_INDEXED			0xFFFFFFFD
 #define S_VS_STATE_LS_OUT_PATCH_SIZE(x)		(((unsigned)(x) & 0x1FFF) << 8)
 #define C_VS_STATE_LS_OUT_PATCH_SIZE		0xFFE000FF
 #define S_VS_STATE_LS_OUT_VERTEX_SIZE(x)	(((unsigned)(x) & 0xFF) << 24)
 #define C_VS_STATE_LS_OUT_VERTEX_SIZE		0x00FFFFFF
 
 /* SI-specific system values. */
 enum {
+	/* Values from set_tess_state. */
 	TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI = TGSI_SEMANTIC_COUNT,
 	TGSI_SEMANTIC_DEFAULT_TESSINNER_SI,
+
+	/* Up to 4 dwords in user SGPRs for compute shaders. */
+	TGSI_SEMANTIC_CS_USER_DATA,
 };
 
 enum {
 	/* Use a property enum that CS wouldn't use. */
 	TGSI_PROPERTY_CS_LOCAL_SIZE = TGSI_PROPERTY_FS_COORD_ORIGIN,
 
+	/* The number of used user data dwords in the range [1, 4]. */
+	TGSI_PROPERTY_CS_USER_DATA_DWORDS = TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
+
 	/* Use a property enum that VS wouldn't use. */
 	TGSI_PROPERTY_VS_BLIT_SGPRS = TGSI_PROPERTY_FS_COORD_ORIGIN,
 
 	/* These represent the number of SGPRs the shader uses. */
 	SI_VS_BLIT_SGPRS_POS = 3,
 	SI_VS_BLIT_SGPRS_POS_COLOR = 7,
 	SI_VS_BLIT_SGPRS_POS_TEXCOORD = 9,
 };
 
 /* For VS shader key fix_fetch. */
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 6cc503690da..f187a06854d 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -166,20 +166,21 @@ struct si_shader_context {
 	int param_es2gs_offset;
 	/* API GS */
 	int param_gs2vs_offset;
 	int param_gs_wave_id; /* GFX6 */
 	LLVMValueRef gs_vtx_offset[6]; /* in dwords (GFX6) */
 	int param_gs_vtx01_offset; /* in dwords (GFX9) */
 	int param_gs_vtx23_offset; /* in dwords (GFX9) */
 	int param_gs_vtx45_offset; /* in dwords (GFX9) */
 	/* CS */
 	int param_block_size;
+	int param_cs_user_data;
 
 	struct ac_llvm_compiler *compiler;
 
 	/* Preloaded descriptors. */
 	LLVMValueRef esgs_ring;
 	LLVMValueRef gsvs_ring[4];
 	LLVMValueRef tess_offchip_ring;
 
 	LLVMValueRef invoc0_tess_factors[6]; /* outer[4], inner[2] */
 	LLVMValueRef gs_next_vertex[4];
-- 
2.17.1



More information about the mesa-dev mailing list