[Mesa-dev] [PATCH v2 13/73] ac, radeonsi: move some VS input descriptions to ac_shader_abi

Wed Jul 5 10:47:57 UTC 2017

From: Nicolai Hähnle <nicolai.haehnle at amd.com>

v2: use LLVM values instead of function parameter indices

Reviewed-by: Marek Olšák <marek.olsak at amd.com> (v1)
---
 src/amd/common/ac_shader_abi.h                    | 40 +++++++++++++++
 src/gallium/drivers/radeonsi/si_shader.c          | 60 +++++++++++++----------
 src/gallium/drivers/radeonsi/si_shader_internal.h |  8 ++-
 3 files changed, 77 insertions(+), 31 deletions(-)
 create mode 100644 src/amd/common/ac_shader_abi.h

diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
new file mode 100644
index 0000000..e10550b
--- /dev/null
+++ b/src/amd/common/ac_shader_abi.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef AC_SHADER_ABI_H
+#define AC_SHADER_ABI_H
+
+#include <llvm-c/Core.h>
+
+/* Document the shader ABI during compilation. This is what allows radeonsi and
+ * radv to share a compiler backend.
+ */
+struct ac_shader_abi {
+	LLVMValueRef base_vertex;
+	LLVMValueRef start_instance;
+	LLVMValueRef draw_id;
+	LLVMValueRef vertex_id;
+	LLVMValueRef instance_id;
+};
+
+#endif /* AC_SHADER_ABI_H */
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 28923e4..d8bacdb 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -60,20 +60,21 @@ struct si_shader_output_values
 	unsigned semantic_index;
 	ubyte vertex_stream[4];
 };
 
 /**
  * Used to collect types and other info about arguments of the LLVM function
  * before the function is created.
  */
 struct si_function_info {
 	LLVMTypeRef types[100];
+	LLVMValueRef *assign[100];
 	unsigned num_sgpr_params;
 	unsigned num_params;
 };
 
 enum si_arg_regfile {
 	ARG_SGPR,
 	ARG_VGPR
 };
 
 static void si_init_shader_ctx(struct si_shader_context *ctx,
@@ -118,35 +119,43 @@ static bool is_merged_shader(struct si_shader *shader)
 	       shader->selector->type == PIPE_SHADER_TESS_CTRL ||
 	       shader->selector->type == PIPE_SHADER_GEOMETRY;
 }
 
 static void si_init_function_info(struct si_function_info *fninfo)
 {
 	fninfo->num_params = 0;
 	fninfo->num_sgpr_params = 0;
 }
 
-static unsigned add_arg(struct si_function_info *fninfo,
-			enum si_arg_regfile regfile, LLVMTypeRef type)
+static unsigned add_arg_assign(struct si_function_info *fninfo,
+			enum si_arg_regfile regfile, LLVMTypeRef type,
+			LLVMValueRef *assign)
 {
 	assert(regfile != ARG_SGPR || fninfo->num_sgpr_params == fninfo->num_params);
 
 	unsigned idx = fninfo->num_params++;
 	assert(idx < ARRAY_SIZE(fninfo->types));
 
 	if (regfile == ARG_SGPR)
 		fninfo->num_sgpr_params = fninfo->num_params;
 
 	fninfo->types[idx] = type;
+	fninfo->assign[idx] = assign;
 	return idx;
 }
 
+static unsigned add_arg(struct si_function_info *fninfo,
+			enum si_arg_regfile regfile, LLVMTypeRef type)
+{
+	return add_arg_assign(fninfo, regfile, type, NULL);
+}
+
 static void add_arg_checked(struct si_function_info *fninfo,
 			    enum si_arg_regfile regfile, LLVMTypeRef type,
 			    unsigned idx)
 {
 	MAYBE_UNUSED unsigned actual = add_arg(fninfo, regfile, type);
 	assert(actual == idx);
 }
 
 /**
  * Returns a unique index for a per-patch semantic name and index. The index
@@ -353,22 +362,21 @@ get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx)
 					 rel_patch_id, ""),
 			    "");
 }
 
 static LLVMValueRef get_instance_index_for_fetch(
 	struct si_shader_context *ctx,
 	unsigned param_start_instance, LLVMValueRef divisor)
 {
 	struct gallivm_state *gallivm = &ctx->gallivm;
 
-	LLVMValueRef result = LLVMGetParam(ctx->main_fn,
-					   ctx->param_instance_id);
+	LLVMValueRef result = ctx->abi.instance_id;
 
 	/* The division must be done before START_INSTANCE is added. */
 	if (divisor != ctx->i32_1)
 		result = LLVMBuildUDiv(gallivm->builder, result, divisor, "");
 
 	return LLVMBuildAdd(gallivm->builder, result,
 			    LLVMGetParam(ctx->main_fn, param_start_instance), "");
 }
 
 /* Bitcast <4 x float> to <2 x double>, extract the component, and convert
@@ -1463,30 +1471,27 @@ static void declare_system_value(struct si_shader_context *ctx,
 				 const struct tgsi_full_declaration *decl)
 {
 	struct lp_build_context *bld = &ctx->bld_base.base;
 	struct gallivm_state *gallivm = &ctx->gallivm;
 	LLVMValueRef value = 0;
 
 	assert(index < RADEON_LLVM_MAX_SYSTEM_VALUES);
 
 	switch (decl->Semantic.Name) {
 	case TGSI_SEMANTIC_INSTANCEID:
-		value = LLVMGetParam(ctx->main_fn,
-				     ctx->param_instance_id);
+		value = ctx->abi.instance_id;
 		break;
 
 	case TGSI_SEMANTIC_VERTEXID:
 		value = LLVMBuildAdd(gallivm->builder,
-				     LLVMGetParam(ctx->main_fn,
-						  ctx->param_vertex_id),
-				     LLVMGetParam(ctx->main_fn,
-						  ctx->param_base_vertex), "");
+				     ctx->abi.vertex_id,
+				     ctx->abi.base_vertex, "");
 		break;
 
 	case TGSI_SEMANTIC_VERTEXID_NOBASE:
 		/* Unused. Clarify the meaning in indexed vs. non-indexed
 		 * draws if this is ever used again. */
 		assert(false);
 		break;
 
 	case TGSI_SEMANTIC_BASEVERTEX:
 	{
@@ -1494,31 +1499,30 @@ static void declare_system_value(struct si_shader_context *ctx,
 		 * (for direct draws) or the CP (for indirect draws) is the
 		 * first vertex ID, but GLSL expects 0 to be returned.
 		 */
 		LLVMValueRef vs_state = LLVMGetParam(ctx->main_fn, ctx->param_vs_state_bits);
 		LLVMValueRef indexed;
 
 		indexed = LLVMBuildLShr(gallivm->builder, vs_state, ctx->i32_1, "");
 		indexed = LLVMBuildTrunc(gallivm->builder, indexed, ctx->i1, "");
 
 		value = LLVMBuildSelect(gallivm->builder, indexed,
-					LLVMGetParam(ctx->main_fn, ctx->param_base_vertex),
-					ctx->i32_0, "");
+					ctx->abi.base_vertex, ctx->i32_0, "");
 		break;
 	}
 
 	case TGSI_SEMANTIC_BASEINSTANCE:
-		value = LLVMGetParam(ctx->main_fn, ctx->param_start_instance);
+		value = ctx->abi.start_instance;
 		break;
 
 	case TGSI_SEMANTIC_DRAWID:
-		value = LLVMGetParam(ctx->main_fn, ctx->param_draw_id);
+		value = ctx->abi.draw_id;
 		break;
 
 	case TGSI_SEMANTIC_INVOCATIONID:
 		if (ctx->type == PIPE_SHADER_TESS_CTRL)
 			value = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5);
 		else if (ctx->type == PIPE_SHADER_GEOMETRY)
 			value = LLVMGetParam(ctx->main_fn,
 					     ctx->param_gs_instance_id);
 		else
 			assert(!"INVOCATIONID not implemented");
@@ -4006,20 +4010,25 @@ static void si_create_function(struct si_shader_context *ctx,
 		 * SGPR spilling significantly.
 		 */
 		if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) {
 			lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_BYVAL);
 			lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_NOALIAS);
 			ac_add_attr_dereferenceable(P, UINT64_MAX);
 		} else
 			lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_INREG);
 	}
 
+	for (i = 0; i < fninfo->num_params; ++i) {
+		if (fninfo->assign[i])
+			*fninfo->assign[i] = LLVMGetParam(ctx->main_fn, i);
+	}
+
 	if (max_workgroup_size) {
 		si_llvm_add_attribute(ctx->main_fn, "amdgpu-max-work-group-size",
 				      max_workgroup_size);
 	}
 	LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
 					   "no-signed-zeros-fp-math",
 					   "true");
 
 	if (ctx->screen->b.debug_flags & DBG_UNSAFE_MATH) {
 		/* These were copied from some LLVM test. */
@@ -4153,38 +4162,38 @@ static void declare_default_desc_pointers(struct si_shader_context *ctx,
 	ctx->param_rw_buffers = add_arg(fninfo, ARG_SGPR,
 		si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS));
 	declare_per_stage_desc_pointers(ctx, fninfo, true);
 }
 
 static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx,
 					    struct si_function_info *fninfo)
 {
 	ctx->param_vertex_buffers = add_arg(fninfo, ARG_SGPR,
 		si_const_array(ctx->v4i32, SI_NUM_VERTEX_BUFFERS));
-	ctx->param_base_vertex = add_arg(fninfo, ARG_SGPR, ctx->i32);
-	ctx->param_start_instance = add_arg(fninfo, ARG_SGPR, ctx->i32);
-	ctx->param_draw_id = add_arg(fninfo, ARG_SGPR, ctx->i32);
+	add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.base_vertex);
+	add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.start_instance);
+	add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.draw_id);
 	ctx->param_vs_state_bits = add_arg(fninfo, ARG_SGPR, ctx->i32);
 }
 
 static void declare_vs_input_vgprs(struct si_shader_context *ctx,
 				   struct si_function_info *fninfo,
 				   unsigned *num_prolog_vgprs)
 {
 	struct si_shader *shader = ctx->shader;
 
-	ctx->param_vertex_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
+	add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.vertex_id);
 	if (shader->key.as_ls) {
 		ctx->param_rel_auto_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
-		ctx->param_instance_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
+		add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.instance_id);
 	} else {
-		ctx->param_instance_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
+		add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.instance_id);
 		ctx->param_vs_prim_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
 	}
 	add_arg(fninfo, ARG_VGPR, ctx->i32); /* unused */
 
 	if (!shader->is_gs_copy_shader) {
 		/* Vertex load indices. */
 		ctx->param_vertex_index0 = fninfo->num_params;
 		for (unsigned i = 0; i < shader->selector->info.num_inputs; i++)
 			add_arg(fninfo, ARG_VGPR, ctx->i32);
 		*num_prolog_vgprs += shader->selector->info.num_inputs;
@@ -5196,22 +5205,21 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
 	si_init_shader_ctx(&ctx, sscreen, tm);
 	ctx.shader = shader;
 	ctx.type = PIPE_SHADER_VERTEX;
 
 	builder = gallivm->builder;
 
 	create_function(&ctx);
 	preload_ring_buffers(&ctx);
 
 	LLVMValueRef voffset =
-		lp_build_mul_imm(uint, LLVMGetParam(ctx.main_fn,
-						    ctx.param_vertex_id), 4);
+		lp_build_mul_imm(uint, ctx.abi.vertex_id, 4);
 
 	/* Fetch the vertex stream ID.*/
 	LLVMValueRef stream_id;
 
 	if (gs_selector->so.num_outputs)
 		stream_id = unpack_param(&ctx, ctx.param_streamout_config, 24, 2);
 	else
 		stream_id = ctx.i32_0;
 
 	/* Fill in output information. */
@@ -6594,42 +6602,42 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
 	LLVMTypeRef *returns;
 	LLVMValueRef ret, func;
 	int num_returns, i;
 	unsigned first_vs_vgpr = key->vs_prolog.num_input_sgprs +
 				 key->vs_prolog.num_merged_next_stage_vgprs;
 	unsigned num_input_vgprs = key->vs_prolog.num_merged_next_stage_vgprs + 4;
 	unsigned num_all_input_regs = key->vs_prolog.num_input_sgprs +
 				      num_input_vgprs;
 	unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 8 : 0;
 
-	ctx->param_vertex_id = first_vs_vgpr;
-	ctx->param_instance_id = first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1);
-
 	si_init_function_info(&fninfo);
 
 	/* 4 preloaded VGPRs + vertex load indices as prolog outputs */
 	returns = alloca((num_all_input_regs + key->vs_prolog.last_input + 1) *
 			 sizeof(LLVMTypeRef));
 	num_returns = 0;
 
 	/* Declare input and output SGPRs. */
 	for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
 		add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		returns[num_returns++] = ctx->i32;
 	}
 
 	/* Preloaded VGPRs (outputs must be floats) */
 	for (i = 0; i < num_input_vgprs; i++) {
 		add_arg(&fninfo, ARG_VGPR, ctx->i32);
 		returns[num_returns++] = ctx->f32;
 	}
 
+	fninfo.assign[first_vs_vgpr] = &ctx->abi.vertex_id;
+	fninfo.assign[first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1)] = &ctx->abi.instance_id;
+
 	/* Vertex load indices. */
 	for (i = 0; i <= key->vs_prolog.last_input; i++)
 		returns[num_returns++] = ctx->f32;
 
 	/* Create the function. */
 	si_create_function(ctx, "vs_prolog", returns, num_returns, &fninfo, 0);
 	func = ctx->main_fn;
 
 	if (key->vs_prolog.num_merged_next_stage_vgprs &&
 	    !key->vs_prolog.is_monolithic)
@@ -6678,21 +6686,21 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
 			}
 
 			/* InstanceID / Divisor + StartInstance */
 			index = get_instance_index_for_fetch(ctx,
 							     user_sgpr_base +
 							     SI_SGPR_START_INSTANCE,
 							     divisor);
 		} else {
 			/* VertexID + BaseVertex */
 			index = LLVMBuildAdd(gallivm->builder,
-					     LLVMGetParam(func, ctx->param_vertex_id),
+					     ctx->abi.vertex_id,
 					     LLVMGetParam(func, user_sgpr_base +
 								SI_SGPR_BASE_VERTEX), "");
 		}
 
 		index = LLVMBuildBitCast(gallivm->builder, index, ctx->f32, "");
 		ret = LLVMBuildInsertValue(gallivm->builder, ret, index,
 					   fninfo.num_params + i, "");
 	}
 
 	si_llvm_build_ret(ctx, ret);
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 3556e69..90a70b1 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -21,20 +21,21 @@
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #ifndef SI_SHADER_PRIVATE_H
 #define SI_SHADER_PRIVATE_H
 
 #include "si_shader.h"
 #include "gallivm/lp_bld_init.h"
 #include "gallivm/lp_bld_tgsi.h"
 #include "tgsi/tgsi_parse.h"
+#include "ac_shader_abi.h"
 #include "ac_llvm_util.h"
 #include "ac_llvm_build.h"
 
 #include <llvm-c/Core.h>
 #include <llvm-c/TargetMachine.h>
 
 struct pipe_debug_callback;
 struct ac_shader_binary;
 
 #define RADEON_LLVM_MAX_INPUT_SLOTS 32
@@ -59,20 +60,22 @@ struct si_shader_context {
 
 	/* For clamping the non-constant index in resource indexing: */
 	unsigned num_const_buffers;
 	unsigned num_shader_buffers;
 	unsigned num_images;
 	unsigned num_samplers;
 
 	/* Whether the prolog will be compiled separately. */
 	bool separate_prolog;
 
+	struct ac_shader_abi abi;
+
 	/** This function is responsible for initilizing the inputs array and will be
 	  * called once for each input declared in the TGSI shader.
 	  */
 	void (*load_input)(struct si_shader_context *,
 			   unsigned input_index,
 			   const struct tgsi_full_declaration *decl,
 			   LLVMValueRef out[4]);
 
 	void (*load_system_value)(struct si_shader_context *,
 				  unsigned index,
@@ -115,27 +118,22 @@ struct si_shader_context {
 
 	/* Parameter indices for LLVMGetParam. */
 	int param_rw_buffers;
 	int param_const_and_shader_buffers;
 	int param_samplers_and_images;
 	/* Common inputs for merged shaders. */
 	int param_merged_wave_info;
 	int param_merged_scratch_offset;
 	/* API VS */
 	int param_vertex_buffers;
-	int param_base_vertex;
-	int param_start_instance;
-	int param_draw_id;
-	int param_vertex_id;
 	int param_rel_auto_id;
 	int param_vs_prim_id;
-	int param_instance_id;
 	int param_vertex_index0;
 	/* VS states and layout of LS outputs / TCS inputs at the end
 	 *   [0] = clamp vertex color
 	 *   [1] = indexed
 	 *   [8:20] = stride between patches in DW = num_inputs * num_vertices * 4
 	 *            max = 32*32*4 + 32*4
 	 *   [24:31] = stride between vertices in DW = num_inputs * 4
 	 *             max = 32*4
 	 */
 	int param_vs_state_bits;
-- 
2.9.3