Mesa (master): radeonsi/gfx9: declare LDS ESGS ring as an explicit symbol on LLVM >= 9

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Jun 13 00:40:14 UTC 2019


Module: Mesa
Branch: master
Commit: b519ddc35cf9098ad1541457a3d1c34a8ec99961
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=b519ddc35cf9098ad1541457a3d1c34a8ec99961

Author: Nicolai Hähnle <nicolai.haehnle at amd.com>
Date:   Sat May  4 12:11:08 2019 +0200

radeonsi/gfx9: declare LDS ESGS ring as an explicit symbol on LLVM >= 9

This will make it easier to use LDS for other purposes in geometry
shaders in the future.

The lifetime of the esgs_ring variable is as follows:
- declared as [0 x i32] while compiling shader parts or monolithic shaders
- just before uploading, gfx9_get_gs_info computes (among other things)
  the final ESGS ring size (this depends on both the ES and the GS shader)
- during upload, the "esgs_ring" symbol is given to ac_rtld as a shared
  LDS symbol, which will lead to correctly laying out the LDS including
  other LDS objects that may be defined in the future
- si_shader_gs uses shader->config.lds_size as the LDS size

This change depends on the LLVM changes for emitting LDS symbols into
the ELF file.

Reviewed-by: Marek Olšák <marek.olsak at amd.com>

---

 src/gallium/drivers/radeonsi/si_shader.c        | 82 ++++++++++++++++++++-----
 src/gallium/drivers/radeonsi/si_shader.h        | 19 ++++++
 src/gallium/drivers/radeonsi/si_state_shaders.c | 29 +++------
 3 files changed, 94 insertions(+), 36 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index ab97cd87273..85c2acc2195 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1543,9 +1543,22 @@ LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
 			return NULL;
 		}
 
+		unsigned offset = param * 4 + swizzle;
 		vtx_offset = LLVMBuildAdd(ctx->ac.builder, vtx_offset,
-					  LLVMConstInt(ctx->i32, param * 4, 0), "");
-		return lds_load(bld_base, type, swizzle, vtx_offset);
+					  LLVMConstInt(ctx->i32, offset, false), "");
+
+		LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->esgs_ring, vtx_offset);
+		LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, ptr, "");
+		if (llvm_type_is_64bit(ctx, type)) {
+			ptr = LLVMBuildGEP(ctx->ac.builder, ptr,
+					   &ctx->ac.i32_1, 1, "");
+			LLVMValueRef values[2] = {
+				value,
+				LLVMBuildLoad(ctx->ac.builder, ptr, "")
+			};
+			value = ac_build_gather_values(&ctx->ac, values, 2);
+		}
+		return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
 	}
 
 	/* GFX6: input load from the ESGS ring in memory. */
@@ -3513,7 +3526,9 @@ static void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi,
 
 			/* GFX9 has the ESGS ring in LDS. */
 			if (ctx->screen->info.chip_class >= GFX9) {
-				lds_store(ctx, param * 4 + chan, lds_base, out_val);
+				LLVMValueRef idx = LLVMConstInt(ctx->i32, param * 4 + chan, false);
+				idx = LLVMBuildAdd(ctx->ac.builder, lds_base, idx, "");
+				ac_build_indexed_store(&ctx->ac, ctx->esgs_ring, idx, out_val);
 				continue;
 			}
 
@@ -4911,10 +4926,7 @@ static void create_function(struct si_shader_context *ctx)
 	assert(shader->info.num_input_vgprs >= num_prolog_vgprs);
 	shader->info.num_input_vgprs -= num_prolog_vgprs;
 
-	if (shader->key.as_ls ||
-	    ctx->type == PIPE_SHADER_TESS_CTRL ||
-	    /* GFX9 has the ESGS ring buffer in LDS. */
-	    type == SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY)
+	if (shader->key.as_ls || ctx->type == PIPE_SHADER_TESS_CTRL)
 		ac_declare_lds_as_pointer(&ctx->ac);
 }
 
@@ -4929,15 +4941,33 @@ static void preload_ring_buffers(struct si_shader_context *ctx)
 	LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn,
 					    ctx->param_rw_buffers);
 
-	if (ctx->screen->info.chip_class <= GFX8 &&
-	    (ctx->shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY)) {
-		unsigned ring =
-			ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS
-							     : SI_ES_RING_ESGS;
-		LLVMValueRef offset = LLVMConstInt(ctx->i32, ring, 0);
+	if (ctx->shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY) {
+		if (ctx->screen->info.chip_class <= GFX8) {
+			unsigned ring =
+				ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS
+								  : SI_ES_RING_ESGS;
+			LLVMValueRef offset = LLVMConstInt(ctx->i32, ring, 0);
 
-		ctx->esgs_ring =
-			ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
+			ctx->esgs_ring =
+				ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
+		} else {
+			if (USE_LDS_SYMBOLS && HAVE_LLVM >= 0x0900) {
+				/* Declare the ESGS ring as an explicit LDS symbol.
+				 * For monolithic shaders, we declare the ring only once.
+				 *
+				 * We declare it with 64KB alignment as a hint that the
+				 * pointer value will always be 0.
+				 */
+				ctx->esgs_ring = LLVMAddGlobalInAddressSpace(
+					ctx->ac.module, LLVMArrayType(ctx->i32, 0),
+					"esgs_ring",
+					AC_ADDR_SPACE_LDS);
+				LLVMSetAlignment(ctx->esgs_ring, 64 * 1024);
+			} else {
+				ac_declare_lds_as_pointer(&ctx->ac);
+				ctx->esgs_ring = ctx->ac.lds;
+			}
+		}
 	}
 
 	if (ctx->shader->is_gs_copy_shader) {
@@ -5055,6 +5085,7 @@ static bool si_shader_binary_open(struct si_screen *screen,
 				  struct si_shader *shader,
 				  struct ac_rtld_binary *rtld)
 {
+	const struct si_shader_selector *sel = shader->selector;
 	const char *part_elfs[5];
 	size_t part_sizes[5];
 	unsigned num_parts = 0;
@@ -5074,11 +5105,27 @@ static bool si_shader_binary_open(struct si_screen *screen,
 
 #undef add_part
 
+	struct ac_rtld_symbol lds_symbols[1];
+	unsigned num_lds_symbols = 0;
+
+	if (sel && screen->info.chip_class >= GFX9 &&
+	    sel->type == PIPE_SHADER_GEOMETRY && !shader->is_gs_copy_shader) {
+		/* We add this symbol even on LLVM <= 8 to ensure that
+		 * shader->config.lds_size is set correctly below.
+		 */
+		struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
+		sym->name = "esgs_ring";
+		sym->size = shader->gs_info.esgs_ring_size;
+		sym->align = 64 * 1024;
+	}
+
 	bool ok = ac_rtld_open(rtld, (struct ac_rtld_open_info){
 			.info = &screen->info,
 			.num_parts = num_parts,
 			.elf_ptrs = part_elfs,
-			.elf_sizes = part_sizes });
+			.elf_sizes = part_sizes,
+			.num_shared_lds_symbols = num_lds_symbols,
+			.shared_lds_symbols = lds_symbols });
 
 	if (rtld->lds_size > 0) {
 		unsigned alloc_granularity = screen->info.chip_class >= GFX7 ? 512 : 256;
@@ -8013,6 +8060,9 @@ bool si_shader_create(struct si_screen *sscreen, struct ac_llvm_compiler *compil
 		si_calculate_max_simd_waves(shader);
 	}
 
+	if (sscreen->info.chip_class >= GFX9 && sel->type == PIPE_SHADER_GEOMETRY)
+		gfx9_get_gs_info(shader->previous_stage_sel, sel, &shader->gs_info);
+
 	si_fix_resource_usage(sscreen, shader);
 	si_shader_dump(sscreen, shader, debug, sel->info.processor,
 		       stderr, true);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index beda82beb05..f9a754d7f0b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -143,6 +143,10 @@
 
 #include <stdio.h>
 
+// Use LDS symbols when supported by LLVM. Can be disabled for testing the old
+// path on newer LLVM for now. Should be removed in the long term.
+#define USE_LDS_SYMBOLS (true)
+
 struct nir_shader;
 struct si_shader;
 struct si_context;
@@ -595,6 +599,14 @@ struct si_shader_binary {
 	char *llvm_ir_string;
 };
 
+struct gfx9_gs_info {
+	unsigned es_verts_per_subgroup;
+	unsigned gs_prims_per_subgroup;
+	unsigned gs_inst_prims_in_subgroup;
+	unsigned max_prims_per_subgroup;
+	unsigned esgs_ring_size; /* in bytes */
+};
+
 struct si_shader {
 	struct si_compiler_ctx_state	compiler_ctx_state;
 
@@ -629,6 +641,8 @@ struct si_shader {
 	char				*shader_log;
 	size_t				shader_log_size;
 
+	struct gfx9_gs_info gs_info;
+
 	/* For save precompute context registers values. */
 	union {
 		struct {
@@ -718,6 +732,11 @@ void si_nir_scan_tess_ctrl(const struct nir_shader *nir,
 void si_lower_nir(struct si_shader_selector *sel);
 void si_nir_opts(struct nir_shader *nir);
 
+/* si_state_shaders.c */
+void gfx9_get_gs_info(struct si_shader_selector *es,
+		      struct si_shader_selector *gs,
+		      struct gfx9_gs_info *out);
+
 /* Inline helpers. */
 
 /* Return the pointer to the main shader part's pointer. */
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index e90884c898a..fab2e255742 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -646,17 +646,9 @@ static unsigned si_conv_prim_to_gs_out(unsigned mode)
 	return prim_conv[mode];
 }
 
-struct gfx9_gs_info {
-	unsigned es_verts_per_subgroup;
-	unsigned gs_prims_per_subgroup;
-	unsigned gs_inst_prims_in_subgroup;
-	unsigned max_prims_per_subgroup;
-	unsigned lds_size;
-};
-
-static void gfx9_get_gs_info(struct si_shader_selector *es,
-				   struct si_shader_selector *gs,
-				   struct gfx9_gs_info *out)
+void gfx9_get_gs_info(struct si_shader_selector *es,
+		      struct si_shader_selector *gs,
+		      struct gfx9_gs_info *out)
 {
 	unsigned gs_num_invocations = MAX2(gs->gs_num_invocations, 1);
 	unsigned input_prim = gs->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM];
@@ -747,7 +739,7 @@ static void gfx9_get_gs_info(struct si_shader_selector *es,
 	out->gs_inst_prims_in_subgroup = gs_prims * gs_num_invocations;
 	out->max_prims_per_subgroup = out->gs_inst_prims_in_subgroup *
 				      gs->gs_max_out_vertices;
-	out->lds_size = align(esgs_lds_size, 128) / 128;
+	out->esgs_ring_size = 4 * esgs_lds_size;
 
 	assert(out->max_prims_per_subgroup <= max_out_prims);
 }
@@ -876,7 +868,6 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
 		unsigned input_prim = sel->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM];
 		unsigned es_type = shader->key.part.gs.es->type;
 		unsigned es_vgpr_comp_cnt, gs_vgpr_comp_cnt;
-		struct gfx9_gs_info gs_info;
 
 		if (es_type == PIPE_SHADER_VERTEX)
 			/* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
@@ -904,8 +895,6 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
 		else
 			num_user_sgprs = GFX9_TESGS_NUM_USER_SGPR;
 
-		gfx9_get_gs_info(shader->key.part.gs.es, sel, &gs_info);
-
 		si_pm4_set_reg(pm4, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
 		si_pm4_set_reg(pm4, R_00B214_SPI_SHADER_PGM_HI_ES, S_00B214_MEM_BASE(va >> 40));
 
@@ -920,15 +909,15 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
 			       S_00B22C_USER_SGPR_MSB(num_user_sgprs >> 5) |
 			       S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
 			       S_00B22C_OC_LDS_EN(es_type == PIPE_SHADER_TESS_EVAL) |
-			       S_00B22C_LDS_SIZE(gs_info.lds_size) |
+			       S_00B22C_LDS_SIZE(shader->config.lds_size) |
 			       S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
 
 		shader->ctx_reg.gs.vgt_gs_onchip_cntl =
-			S_028A44_ES_VERTS_PER_SUBGRP(gs_info.es_verts_per_subgroup) |
-			S_028A44_GS_PRIMS_PER_SUBGRP(gs_info.gs_prims_per_subgroup) |
-			S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_info.gs_inst_prims_in_subgroup);
+			S_028A44_ES_VERTS_PER_SUBGRP(shader->gs_info.es_verts_per_subgroup) |
+			S_028A44_GS_PRIMS_PER_SUBGRP(shader->gs_info.gs_prims_per_subgroup) |
+			S_028A44_GS_INST_PRIMS_IN_SUBGRP(shader->gs_info.gs_inst_prims_in_subgroup);
 		shader->ctx_reg.gs.vgt_gs_max_prims_per_subgroup =
-			S_028A94_MAX_PRIMS_PER_SUBGROUP(gs_info.max_prims_per_subgroup);
+			S_028A94_MAX_PRIMS_PER_SUBGROUP(shader->gs_info.max_prims_per_subgroup);
 		shader->ctx_reg.gs.vgt_esgs_ring_itemsize =
 			shader->key.part.gs.es->esgs_itemsize / 4;
 




More information about the mesa-commit mailing list