[Mesa-dev] [PATCH 14/15] ac: add si_nir_load_input_gs() to the abi

Timothy Arceri tarceri at itsqueeze.com
Wed Nov 22 10:30:01 UTC 2017


V2: make use of driver_location and don't expose NIR to the ABI.
---
 src/amd/common/ac_nir_to_llvm.c                   | 39 +++++++++++++++--------
 src/amd/common/ac_shader_abi.h                    |  9 ++++++
 src/gallium/drivers/radeonsi/si_shader.c          |  1 +
 src/gallium/drivers/radeonsi/si_shader_internal.h |  9 ++++++
 src/gallium/drivers/radeonsi/si_shader_nir.c      | 20 ++++++++++++
 5 files changed, 64 insertions(+), 14 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 2c30652288..6dc74409a8 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2875,41 +2875,43 @@ load_tes_input(struct nir_to_llvm_context *ctx,
 	buf_addr = LLVMBuildAdd(ctx->builder, buf_addr, comp_offset, "");
 
 	result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, instr->num_components, NULL,
 				      buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false);
 	result = trim_vector(&ctx->ac, result, instr->num_components);
 	result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), "");
 	return result;
 }
 
 static LLVMValueRef
-load_gs_input(struct nir_to_llvm_context *ctx,
-	      nir_intrinsic_instr *instr)
+load_gs_input(struct ac_shader_abi *abi,
+	      unsigned location,
+	      unsigned driver_location,
+	      unsigned component,
+	      unsigned num_components,
+	      unsigned vertex_index,
+	      unsigned const_index,
+	      LLVMTypeRef type)
 {
-	LLVMValueRef indir_index, vtx_offset;
-	unsigned const_index;
+	struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
+	LLVMValueRef vtx_offset;
 	LLVMValueRef args[9];
 	unsigned param, vtx_offset_param;
 	LLVMValueRef value[4], result;
-	unsigned vertex_index;
-	get_deref_offset(ctx->nir, instr->variables[0],
-			 false, &vertex_index, NULL,
-			 &const_index, &indir_index);
+
 	vtx_offset_param = vertex_index;
 	assert(vtx_offset_param < 6);
 	vtx_offset = LLVMBuildMul(ctx->builder, ctx->gs_vtx_offset[vtx_offset_param],
 				  LLVMConstInt(ctx->ac.i32, 4, false), "");
 
-	param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
+	param = shader_io_get_unique_index(location);
 
-	unsigned comp = instr->variables[0]->var->data.location_frac;
-	for (unsigned i = comp; i < instr->num_components + comp; i++) {
+	for (unsigned i = component; i < num_components + component; i++) {
 		if (ctx->ac.chip_class >= GFX9) {
 			LLVMValueRef dw_addr = ctx->gs_vtx_offset[vtx_offset_param];
 			dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
 			                       LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), "");
 			value[i] = ac_lds_load(&ctx->ac, dw_addr);
 		} else {
 			args[0] = ctx->esgs_ring;
 			args[1] = vtx_offset;
 			args[2] = LLVMConstInt(ctx->ac.i32, (param * 4 + i + const_index) * 256, false);
 			args[3] = ctx->ac.i32_0;
@@ -2918,21 +2920,21 @@ load_gs_input(struct nir_to_llvm_context *ctx,
 			args[6] = ctx->ac.i32_1; /* GLC */
 			args[7] = ctx->ac.i32_0; /* SLC */
 			args[8] = ctx->ac.i32_0; /* TFE */
 
 			value[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32",
 			                              ctx->ac.i32, args, 9,
 			                              AC_FUNC_ATTR_READONLY |
 			                              AC_FUNC_ATTR_LEGACY);
 		}
 	}
-	result = ac_build_varying_gather_values(&ctx->ac, value, instr->num_components, comp);
+	result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
 
 	return result;
 }
 
 static LLVMValueRef
 build_gep_for_deref(struct ac_nir_context *ctx,
 		    nir_deref_var *deref)
 {
 	struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, deref->var);
 	assert(entry->data);
@@ -2987,21 +2989,30 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 	if (instr->dest.ssa.bit_size == 64)
 		ve *= 2;
 
 	switch (instr->variables[0]->var->data.mode) {
 	case nir_var_shader_in:
 		if (ctx->stage == MESA_SHADER_TESS_CTRL)
 			return load_tcs_input(ctx->nctx, instr);
 		if (ctx->stage == MESA_SHADER_TESS_EVAL)
 			return load_tes_input(ctx->nctx, instr);
 		if (ctx->stage == MESA_SHADER_GEOMETRY) {
-			return load_gs_input(ctx->nctx, instr);
+				LLVMValueRef indir_index;
+				unsigned const_index, vertex_index;
+				get_deref_offset(ctx, instr->variables[0],
+						 false, &vertex_index, NULL,
+						 &const_index, &indir_index);
+			return ctx->abi->load_inputs(ctx->abi, instr->variables[0]->var->data.location,
+						     instr->variables[0]->var->data.driver_location,
+						     instr->variables[0]->var->data.location_frac, ve,
+						     vertex_index, const_index,
+						     nir2llvmtype(ctx, instr->variables[0]->var->type));
 		}
 
 		for (unsigned chan = comp; chan < ve + comp; chan++) {
 			if (indir_index) {
 				unsigned count = glsl_count_attribute_slots(
 						instr->variables[0]->var->type,
 						ctx->stage == MESA_SHADER_VERTEX);
 				count -= chan / 4;
 				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
 						&ctx->ac, ctx->abi->inputs + idx + chan, count,
@@ -6541,22 +6552,22 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
 
 	for(int i = 0; i < shader_count; ++i) {
 		ctx.stage = shaders[i]->info.stage;
 		ctx.output_mask = 0;
 		ctx.tess_outputs_written = 0;
 		ctx.num_output_clips = shaders[i]->info.clip_distance_array_size;
 		ctx.num_output_culls = shaders[i]->info.cull_distance_array_size;
 
 		if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
 			ctx.gs_next_vertex = ac_build_alloca(&ctx.ac, ctx.ac.i32, "gs_next_vertex");
-
 			ctx.gs_max_out_vertices = shaders[i]->info.gs.vertices_out;
+			ctx.abi.load_inputs = load_gs_input;
 		} else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
 			ctx.tes_primitive_mode = shaders[i]->info.tess.primitive_mode;
 		} else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {
 			if (shader_info->info.vs.needs_instance_id) {
 				ctx.shader_info->vs.vgpr_comp_cnt =
 					MAX2(3, ctx.shader_info->vs.vgpr_comp_cnt);
 			}
 		} else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) {
 			shader_info->fs.can_discard = shaders[i]->info.fs.uses_discard;
 		}
diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
index 56209bd917..68fc431d42 100644
--- a/src/amd/common/ac_shader_abi.h
+++ b/src/amd/common/ac_shader_abi.h
@@ -57,20 +57,29 @@ struct ac_shader_abi {
 	LLVMValueRef *inputs;
 
 	void (*emit_outputs)(struct ac_shader_abi *abi,
 			     unsigned max_outputs,
 			     LLVMValueRef *addrs);
 
 	void (*emit_vertex)(struct ac_shader_abi *abi,
 			    unsigned stream,
 			    LLVMValueRef *addrs);
 
+	LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
+				    unsigned location,
+				    unsigned driver_location,
+				    unsigned component,
+				    unsigned num_components,
+				    unsigned vertex_index,
+				    unsigned const_index,
+				    LLVMTypeRef type);
+
 	LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
 
 	/**
 	 * Load the descriptor for the given buffer.
 	 *
 	 * \param buffer the buffer as presented in NIR: this is the descriptor
 	 *               in Vulkan, and the buffer index in OpenGL/Gallium
 	 * \param write whether buffer contents will be written
 	 */
 	LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index c2338089b3..177ae1a247 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5794,20 +5794,21 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
 	case PIPE_SHADER_TESS_EVAL:
 		bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
 		if (shader->key.as_es)
 			ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
 		else
 			ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
 		bld_base->emit_epilogue = si_tgsi_emit_epilogue;
 		break;
 	case PIPE_SHADER_GEOMETRY:
 		bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
+		ctx->abi.load_inputs = si_nir_load_input_gs;
 		ctx->abi.emit_vertex = si_llvm_emit_vertex;
 		ctx->abi.emit_outputs = si_llvm_emit_gs_epilogue;
 		bld_base->emit_epilogue = si_tgsi_emit_gs_epilogue;
 		break;
 	case PIPE_SHADER_FRAGMENT:
 		ctx->load_input = declare_input_fs;
 		ctx->abi.emit_outputs = si_llvm_return_fs_outputs;
 		bld_base->emit_epilogue = si_tgsi_emit_epilogue;
 		break;
 	case PIPE_SHADER_COMPUTE:
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 03cf8fa6d9..58617c58e4 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -327,11 +327,20 @@ void si_llvm_load_input_vs(
 	struct si_shader_context *ctx,
 	unsigned input_index,
 	LLVMValueRef out[4]);
 void si_llvm_load_input_fs(
 	struct si_shader_context *ctx,
 	unsigned input_index,
 	LLVMValueRef out[4]);
 
 bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir);
 
+LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi,
+				  unsigned location,
+				  unsigned driver_location,
+				  unsigned component,
+				  unsigned num_components,
+				  unsigned vertex_index,
+				  unsigned const_index,
+				  LLVMTypeRef type);
+
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 2e0e3725f7..54f79ba0c3 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -481,20 +481,40 @@ static void declare_nir_input_fs(struct si_shader_context *ctx,
 		out[1] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT);
 		out[2] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT);
 		out[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
 				LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT));
 		return;
 	}
 
 	si_llvm_load_input_fs(ctx, input_index, out);
 }
 
+LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi,
+				  unsigned location,
+				  unsigned driver_location,
+				  unsigned component,
+				  unsigned num_components,
+				  unsigned vertex_index,
+				  unsigned const_index,
+				  LLVMTypeRef type)
+{
+	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+
+	LLVMValueRef value[4];
+	for (unsigned i = component; i < num_components + component; i++) {
+		value[i] = si_llvm_load_input_gs(&ctx->abi, driver_location  / 4,
+						 vertex_index, type, i);
+	}
+
+	return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
+}
+
 static LLVMValueRef
 si_nir_load_sampler_desc(struct ac_shader_abi *abi,
 		         unsigned descriptor_set, unsigned base_index,
 		         unsigned constant_index, LLVMValueRef dynamic_index,
 		         enum ac_descriptor_type desc_type, bool image,
 			 bool write)
 {
 	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
 	LLVMBuilderRef builder = ctx->ac.builder;
 	LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers_and_images);
-- 
2.14.3



More information about the mesa-dev mailing list