[Mesa-dev] [PATCH v2 13/73] ac, radeonsi: move some VS input descriptions to ac_shader_abi
Nicolai Hähnle
nhaehnle at gmail.com
Wed Jul 5 10:47:57 UTC 2017
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
v2: use LLVM values instead of function parameter indices
Reviewed-by: Marek Olšák <marek.olsak at amd.com> (v1)
---
src/amd/common/ac_shader_abi.h | 40 +++++++++++++++
src/gallium/drivers/radeonsi/si_shader.c | 60 +++++++++++++----------
src/gallium/drivers/radeonsi/si_shader_internal.h | 8 ++-
3 files changed, 77 insertions(+), 31 deletions(-)
create mode 100644 src/amd/common/ac_shader_abi.h
diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
new file mode 100644
index 0000000..e10550b
--- /dev/null
+++ b/src/amd/common/ac_shader_abi.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef AC_SHADER_ABI_H
+#define AC_SHADER_ABI_H
+
+#include <llvm-c/Core.h>
+
+/* Document the shader ABI during compilation. This is what allows radeonsi and
+ * radv to share a compiler backend.
+ */
+struct ac_shader_abi {
+ LLVMValueRef base_vertex;
+ LLVMValueRef start_instance;
+ LLVMValueRef draw_id;
+ LLVMValueRef vertex_id;
+ LLVMValueRef instance_id;
+};
+
+#endif /* AC_SHADER_ABI_H */
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 28923e4..d8bacdb 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -60,20 +60,21 @@ struct si_shader_output_values
unsigned semantic_index;
ubyte vertex_stream[4];
};
/**
* Used to collect types and other info about arguments of the LLVM function
* before the function is created.
*/
struct si_function_info {
LLVMTypeRef types[100];
+ LLVMValueRef *assign[100];
unsigned num_sgpr_params;
unsigned num_params;
};
enum si_arg_regfile {
ARG_SGPR,
ARG_VGPR
};
static void si_init_shader_ctx(struct si_shader_context *ctx,
@@ -118,35 +119,43 @@ static bool is_merged_shader(struct si_shader *shader)
shader->selector->type == PIPE_SHADER_TESS_CTRL ||
shader->selector->type == PIPE_SHADER_GEOMETRY;
}
static void si_init_function_info(struct si_function_info *fninfo)
{
fninfo->num_params = 0;
fninfo->num_sgpr_params = 0;
}
-static unsigned add_arg(struct si_function_info *fninfo,
- enum si_arg_regfile regfile, LLVMTypeRef type)
+static unsigned add_arg_assign(struct si_function_info *fninfo,
+ enum si_arg_regfile regfile, LLVMTypeRef type,
+ LLVMValueRef *assign)
{
assert(regfile != ARG_SGPR || fninfo->num_sgpr_params == fninfo->num_params);
unsigned idx = fninfo->num_params++;
assert(idx < ARRAY_SIZE(fninfo->types));
if (regfile == ARG_SGPR)
fninfo->num_sgpr_params = fninfo->num_params;
fninfo->types[idx] = type;
+ fninfo->assign[idx] = assign;
return idx;
}
+static unsigned add_arg(struct si_function_info *fninfo,
+ enum si_arg_regfile regfile, LLVMTypeRef type)
+{
+ return add_arg_assign(fninfo, regfile, type, NULL);
+}
+
static void add_arg_checked(struct si_function_info *fninfo,
enum si_arg_regfile regfile, LLVMTypeRef type,
unsigned idx)
{
MAYBE_UNUSED unsigned actual = add_arg(fninfo, regfile, type);
assert(actual == idx);
}
/**
* Returns a unique index for a per-patch semantic name and index. The index
@@ -353,22 +362,21 @@ get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx)
rel_patch_id, ""),
"");
}
static LLVMValueRef get_instance_index_for_fetch(
struct si_shader_context *ctx,
unsigned param_start_instance, LLVMValueRef divisor)
{
struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMValueRef result = LLVMGetParam(ctx->main_fn,
- ctx->param_instance_id);
+ LLVMValueRef result = ctx->abi.instance_id;
/* The division must be done before START_INSTANCE is added. */
if (divisor != ctx->i32_1)
result = LLVMBuildUDiv(gallivm->builder, result, divisor, "");
return LLVMBuildAdd(gallivm->builder, result,
LLVMGetParam(ctx->main_fn, param_start_instance), "");
}
/* Bitcast <4 x float> to <2 x double>, extract the component, and convert
@@ -1463,30 +1471,27 @@ static void declare_system_value(struct si_shader_context *ctx,
const struct tgsi_full_declaration *decl)
{
struct lp_build_context *bld = &ctx->bld_base.base;
struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef value = 0;
assert(index < RADEON_LLVM_MAX_SYSTEM_VALUES);
switch (decl->Semantic.Name) {
case TGSI_SEMANTIC_INSTANCEID:
- value = LLVMGetParam(ctx->main_fn,
- ctx->param_instance_id);
+ value = ctx->abi.instance_id;
break;
case TGSI_SEMANTIC_VERTEXID:
value = LLVMBuildAdd(gallivm->builder,
- LLVMGetParam(ctx->main_fn,
- ctx->param_vertex_id),
- LLVMGetParam(ctx->main_fn,
- ctx->param_base_vertex), "");
+ ctx->abi.vertex_id,
+ ctx->abi.base_vertex, "");
break;
case TGSI_SEMANTIC_VERTEXID_NOBASE:
/* Unused. Clarify the meaning in indexed vs. non-indexed
* draws if this is ever used again. */
assert(false);
break;
case TGSI_SEMANTIC_BASEVERTEX:
{
@@ -1494,31 +1499,30 @@ static void declare_system_value(struct si_shader_context *ctx,
* (for direct draws) or the CP (for indirect draws) is the
* first vertex ID, but GLSL expects 0 to be returned.
*/
LLVMValueRef vs_state = LLVMGetParam(ctx->main_fn, ctx->param_vs_state_bits);
LLVMValueRef indexed;
indexed = LLVMBuildLShr(gallivm->builder, vs_state, ctx->i32_1, "");
indexed = LLVMBuildTrunc(gallivm->builder, indexed, ctx->i1, "");
value = LLVMBuildSelect(gallivm->builder, indexed,
- LLVMGetParam(ctx->main_fn, ctx->param_base_vertex),
- ctx->i32_0, "");
+ ctx->abi.base_vertex, ctx->i32_0, "");
break;
}
case TGSI_SEMANTIC_BASEINSTANCE:
- value = LLVMGetParam(ctx->main_fn, ctx->param_start_instance);
+ value = ctx->abi.start_instance;
break;
case TGSI_SEMANTIC_DRAWID:
- value = LLVMGetParam(ctx->main_fn, ctx->param_draw_id);
+ value = ctx->abi.draw_id;
break;
case TGSI_SEMANTIC_INVOCATIONID:
if (ctx->type == PIPE_SHADER_TESS_CTRL)
value = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5);
else if (ctx->type == PIPE_SHADER_GEOMETRY)
value = LLVMGetParam(ctx->main_fn,
ctx->param_gs_instance_id);
else
assert(!"INVOCATIONID not implemented");
@@ -4006,20 +4010,25 @@ static void si_create_function(struct si_shader_context *ctx,
* SGPR spilling significantly.
*/
if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) {
lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_BYVAL);
lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_NOALIAS);
ac_add_attr_dereferenceable(P, UINT64_MAX);
} else
lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_INREG);
}
+ for (i = 0; i < fninfo->num_params; ++i) {
+ if (fninfo->assign[i])
+ *fninfo->assign[i] = LLVMGetParam(ctx->main_fn, i);
+ }
+
if (max_workgroup_size) {
si_llvm_add_attribute(ctx->main_fn, "amdgpu-max-work-group-size",
max_workgroup_size);
}
LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
"no-signed-zeros-fp-math",
"true");
if (ctx->screen->b.debug_flags & DBG_UNSAFE_MATH) {
/* These were copied from some LLVM test. */
@@ -4153,38 +4162,38 @@ static void declare_default_desc_pointers(struct si_shader_context *ctx,
ctx->param_rw_buffers = add_arg(fninfo, ARG_SGPR,
si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS));
declare_per_stage_desc_pointers(ctx, fninfo, true);
}
static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx,
struct si_function_info *fninfo)
{
ctx->param_vertex_buffers = add_arg(fninfo, ARG_SGPR,
si_const_array(ctx->v4i32, SI_NUM_VERTEX_BUFFERS));
- ctx->param_base_vertex = add_arg(fninfo, ARG_SGPR, ctx->i32);
- ctx->param_start_instance = add_arg(fninfo, ARG_SGPR, ctx->i32);
- ctx->param_draw_id = add_arg(fninfo, ARG_SGPR, ctx->i32);
+ add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.base_vertex);
+ add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.start_instance);
+ add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.draw_id);
ctx->param_vs_state_bits = add_arg(fninfo, ARG_SGPR, ctx->i32);
}
static void declare_vs_input_vgprs(struct si_shader_context *ctx,
struct si_function_info *fninfo,
unsigned *num_prolog_vgprs)
{
struct si_shader *shader = ctx->shader;
- ctx->param_vertex_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
+ add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.vertex_id);
if (shader->key.as_ls) {
ctx->param_rel_auto_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
- ctx->param_instance_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
+ add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.instance_id);
} else {
- ctx->param_instance_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
+ add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.instance_id);
ctx->param_vs_prim_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
}
add_arg(fninfo, ARG_VGPR, ctx->i32); /* unused */
if (!shader->is_gs_copy_shader) {
/* Vertex load indices. */
ctx->param_vertex_index0 = fninfo->num_params;
for (unsigned i = 0; i < shader->selector->info.num_inputs; i++)
add_arg(fninfo, ARG_VGPR, ctx->i32);
*num_prolog_vgprs += shader->selector->info.num_inputs;
@@ -5196,22 +5205,21 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
si_init_shader_ctx(&ctx, sscreen, tm);
ctx.shader = shader;
ctx.type = PIPE_SHADER_VERTEX;
builder = gallivm->builder;
create_function(&ctx);
preload_ring_buffers(&ctx);
LLVMValueRef voffset =
- lp_build_mul_imm(uint, LLVMGetParam(ctx.main_fn,
- ctx.param_vertex_id), 4);
+ lp_build_mul_imm(uint, ctx.abi.vertex_id, 4);
/* Fetch the vertex stream ID.*/
LLVMValueRef stream_id;
if (gs_selector->so.num_outputs)
stream_id = unpack_param(&ctx, ctx.param_streamout_config, 24, 2);
else
stream_id = ctx.i32_0;
/* Fill in output information. */
@@ -6594,42 +6602,42 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
LLVMTypeRef *returns;
LLVMValueRef ret, func;
int num_returns, i;
unsigned first_vs_vgpr = key->vs_prolog.num_input_sgprs +
key->vs_prolog.num_merged_next_stage_vgprs;
unsigned num_input_vgprs = key->vs_prolog.num_merged_next_stage_vgprs + 4;
unsigned num_all_input_regs = key->vs_prolog.num_input_sgprs +
num_input_vgprs;
unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 8 : 0;
- ctx->param_vertex_id = first_vs_vgpr;
- ctx->param_instance_id = first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1);
-
si_init_function_info(&fninfo);
/* 4 preloaded VGPRs + vertex load indices as prolog outputs */
returns = alloca((num_all_input_regs + key->vs_prolog.last_input + 1) *
sizeof(LLVMTypeRef));
num_returns = 0;
/* Declare input and output SGPRs. */
for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
add_arg(&fninfo, ARG_SGPR, ctx->i32);
returns[num_returns++] = ctx->i32;
}
/* Preloaded VGPRs (outputs must be floats) */
for (i = 0; i < num_input_vgprs; i++) {
add_arg(&fninfo, ARG_VGPR, ctx->i32);
returns[num_returns++] = ctx->f32;
}
+ fninfo.assign[first_vs_vgpr] = &ctx->abi.vertex_id;
+ fninfo.assign[first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1)] = &ctx->abi.instance_id;
+
/* Vertex load indices. */
for (i = 0; i <= key->vs_prolog.last_input; i++)
returns[num_returns++] = ctx->f32;
/* Create the function. */
si_create_function(ctx, "vs_prolog", returns, num_returns, &fninfo, 0);
func = ctx->main_fn;
if (key->vs_prolog.num_merged_next_stage_vgprs &&
!key->vs_prolog.is_monolithic)
@@ -6678,21 +6686,21 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
}
/* InstanceID / Divisor + StartInstance */
index = get_instance_index_for_fetch(ctx,
user_sgpr_base +
SI_SGPR_START_INSTANCE,
divisor);
} else {
/* VertexID + BaseVertex */
index = LLVMBuildAdd(gallivm->builder,
- LLVMGetParam(func, ctx->param_vertex_id),
+ ctx->abi.vertex_id,
LLVMGetParam(func, user_sgpr_base +
SI_SGPR_BASE_VERTEX), "");
}
index = LLVMBuildBitCast(gallivm->builder, index, ctx->f32, "");
ret = LLVMBuildInsertValue(gallivm->builder, ret, index,
fninfo.num_params + i, "");
}
si_llvm_build_ret(ctx, ret);
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 3556e69..90a70b1 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -21,20 +21,21 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SI_SHADER_PRIVATE_H
#define SI_SHADER_PRIVATE_H
#include "si_shader.h"
#include "gallivm/lp_bld_init.h"
#include "gallivm/lp_bld_tgsi.h"
#include "tgsi/tgsi_parse.h"
+#include "ac_shader_abi.h"
#include "ac_llvm_util.h"
#include "ac_llvm_build.h"
#include <llvm-c/Core.h>
#include <llvm-c/TargetMachine.h>
struct pipe_debug_callback;
struct ac_shader_binary;
#define RADEON_LLVM_MAX_INPUT_SLOTS 32
@@ -59,20 +60,22 @@ struct si_shader_context {
/* For clamping the non-constant index in resource indexing: */
unsigned num_const_buffers;
unsigned num_shader_buffers;
unsigned num_images;
unsigned num_samplers;
/* Whether the prolog will be compiled separately. */
bool separate_prolog;
+ struct ac_shader_abi abi;
+
/** This function is responsible for initilizing the inputs array and will be
* called once for each input declared in the TGSI shader.
*/
void (*load_input)(struct si_shader_context *,
unsigned input_index,
const struct tgsi_full_declaration *decl,
LLVMValueRef out[4]);
void (*load_system_value)(struct si_shader_context *,
unsigned index,
@@ -115,27 +118,22 @@ struct si_shader_context {
/* Parameter indices for LLVMGetParam. */
int param_rw_buffers;
int param_const_and_shader_buffers;
int param_samplers_and_images;
/* Common inputs for merged shaders. */
int param_merged_wave_info;
int param_merged_scratch_offset;
/* API VS */
int param_vertex_buffers;
- int param_base_vertex;
- int param_start_instance;
- int param_draw_id;
- int param_vertex_id;
int param_rel_auto_id;
int param_vs_prim_id;
- int param_instance_id;
int param_vertex_index0;
/* VS states and layout of LS outputs / TCS inputs at the end
* [0] = clamp vertex color
* [1] = indexed
* [8:20] = stride between patches in DW = num_inputs * num_vertices * 4
* max = 32*32*4 + 32*4
* [24:31] = stride between vertices in DW = num_inputs * 4
* max = 32*4
*/
int param_vs_state_bits;
--
2.9.3
More information about the mesa-dev
mailing list