[Mesa-dev] [PATCH 01/19] radeonsi: clean up passing the is_monolithic flag for compilation
Marek Olšák
maraeo at gmail.com
Fri Jun 22 22:31:52 UTC 2018
From: Marek Olšák <marek.olsak at amd.com>
---
src/gallium/drivers/radeonsi/si_shader.c | 30 +++++++++----------
src/gallium/drivers/radeonsi/si_shader.h | 1 -
.../drivers/radeonsi/si_shader_internal.h | 3 --
.../drivers/radeonsi/si_state_shaders.c | 7 +++--
4 files changed, 18 insertions(+), 23 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index e7e2a12a7b0..677853af60b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5047,22 +5047,21 @@ static void create_function(struct si_shader_context *ctx)
break;
default:
assert(0 && "unimplemented shader");
return;
}
si_create_function(ctx, "main", returns, num_returns, &fninfo,
si_get_max_workgroup_size(shader));
/* Reserve register locations for VGPR inputs the PS prolog may need. */
- if (ctx->type == PIPE_SHADER_FRAGMENT &&
- ctx->separate_prolog) {
+ if (ctx->type == PIPE_SHADER_FRAGMENT && !ctx->shader->is_monolithic) {
ac_llvm_add_target_dep_function_attr(ctx->main_fn,
"InitialPSInputAddr",
S_0286D0_PERSP_SAMPLE_ENA(1) |
S_0286D0_PERSP_CENTER_ENA(1) |
S_0286D0_PERSP_CENTROID_ENA(1) |
S_0286D0_LINEAR_SAMPLE_ENA(1) |
S_0286D0_LINEAR_CENTER_ENA(1) |
S_0286D0_LINEAR_CENTROID_ENA(1) |
S_0286D0_FRONT_FACE_ENA(1) |
S_0286D0_ANCILLARY_ENA(1) |
@@ -6049,22 +6048,21 @@ static void si_init_exec_from_input(struct si_shader_context *ctx,
}
static bool si_vs_needs_prolog(const struct si_shader_selector *sel,
const struct si_vs_prolog_bits *key)
{
/* VGPR initialization fixup for Vega10 and Raven is always done in the
* VS prolog. */
return sel->vs_needs_prolog || key->ls_vgpr_fix;
}
-static bool si_compile_tgsi_main(struct si_shader_context *ctx,
- bool is_monolithic)
+static bool si_compile_tgsi_main(struct si_shader_context *ctx)
{
struct si_shader *shader = ctx->shader;
struct si_shader_selector *sel = shader->selector;
struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
// TODO clean all this up!
switch (ctx->type) {
case PIPE_SHADER_VERTEX:
ctx->load_input = declare_input_vs;
if (shader->key.as_ls)
@@ -6135,31 +6133,31 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
* - Add a barrier before the second shader.
* - In the second shader, reset EXEC to ~0 and wrap the main part in
* an if-statement. This is required for correctness in geometry
* shaders, to ensure that empty GS waves do not send GS_EMIT and
* GS_CUT messages.
*
* For monolithic merged shaders, the first shader is wrapped in an
* if-block together with its prolog in si_build_wrapper_function.
*/
if (ctx->screen->info.chip_class >= GFX9) {
- if (!is_monolithic &&
+ if (!shader->is_monolithic &&
sel->info.num_instructions > 1 && /* not empty shader */
(shader->key.as_es || shader->key.as_ls) &&
(ctx->type == PIPE_SHADER_TESS_EVAL ||
(ctx->type == PIPE_SHADER_VERTEX &&
!si_vs_needs_prolog(sel, &shader->key.part.vs.prolog)))) {
si_init_exec_from_input(ctx,
ctx->param_merged_wave_info, 0);
} else if (ctx->type == PIPE_SHADER_TESS_CTRL ||
ctx->type == PIPE_SHADER_GEOMETRY) {
- if (!is_monolithic)
+ if (!shader->is_monolithic)
ac_init_exec_full_mask(&ctx->ac);
LLVMValueRef num_threads = si_unpack_param(ctx, ctx->param_merged_wave_info, 8, 8);
LLVMValueRef ena =
LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
ac_get_thread_id(&ctx->ac), num_threads, "");
lp_build_if(&ctx->merged_wrap_if_state, &ctx->gallivm, ena);
/* The barrier must execute for all shaders in a
* threadgroup.
@@ -6766,71 +6764,69 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
}
}
}
LLVMBuildRetVoid(builder);
}
int si_compile_tgsi_shader(struct si_screen *sscreen,
struct si_compiler *compiler,
struct si_shader *shader,
- bool is_monolithic,
struct pipe_debug_callback *debug)
{
struct si_shader_selector *sel = shader->selector;
struct si_shader_context ctx;
int r = -1;
/* Dump TGSI code before doing TGSI->LLVM conversion in case the
* conversion fails. */
if (si_can_dump_shader(sscreen, sel->info.processor) &&
!(sscreen->debug_flags & DBG(NO_TGSI))) {
if (sel->tokens)
tgsi_dump(sel->tokens, 0);
else
nir_print_shader(sel->nir, stderr);
si_dump_streamout(&sel->so);
}
si_init_shader_ctx(&ctx, sscreen, compiler);
si_llvm_context_set_tgsi(&ctx, shader);
- ctx.separate_prolog = !is_monolithic;
memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
sizeof(shader->info.vs_output_param_offset));
shader->info.uses_instanceid = sel->info.uses_instanceid;
- if (!si_compile_tgsi_main(&ctx, is_monolithic)) {
+ if (!si_compile_tgsi_main(&ctx)) {
si_llvm_dispose(&ctx);
return -1;
}
- if (is_monolithic && ctx.type == PIPE_SHADER_VERTEX) {
+ if (shader->is_monolithic && ctx.type == PIPE_SHADER_VERTEX) {
LLVMValueRef parts[2];
bool need_prolog = sel->vs_needs_prolog;
parts[1] = ctx.main_fn;
if (need_prolog) {
union si_shader_part_key prolog_key;
si_get_vs_prolog_key(&sel->info,
shader->info.num_input_sgprs,
&shader->key.part.vs.prolog,
shader, &prolog_key);
si_build_vs_prolog_function(&ctx, &prolog_key);
parts[0] = ctx.main_fn;
}
si_build_wrapper_function(&ctx, parts + !need_prolog,
1 + need_prolog, need_prolog, 0);
- } else if (is_monolithic && ctx.type == PIPE_SHADER_TESS_CTRL) {
+ } else if (shader->is_monolithic && ctx.type == PIPE_SHADER_TESS_CTRL) {
if (sscreen->info.chip_class >= GFX9) {
struct si_shader_selector *ls = shader->key.part.tcs.ls;
LLVMValueRef parts[4];
bool vs_needs_prolog =
si_vs_needs_prolog(ls, &shader->key.part.tcs.ls_prolog);
/* TCS main part */
parts[2] = ctx.main_fn;
/* TCS epilog */
@@ -6839,23 +6835,24 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
tcs_epilog_key.tcs_epilog.states = shader->key.part.tcs.epilog;
si_build_tcs_epilog_function(&ctx, &tcs_epilog_key);
parts[3] = ctx.main_fn;
/* VS as LS main part */
struct si_shader shader_ls = {};
shader_ls.selector = ls;
shader_ls.key.as_ls = 1;
shader_ls.key.mono = shader->key.mono;
shader_ls.key.opt = shader->key.opt;
+ shader_ls.is_monolithic = true;
si_llvm_context_set_tgsi(&ctx, &shader_ls);
- if (!si_compile_tgsi_main(&ctx, true)) {
+ if (!si_compile_tgsi_main(&ctx)) {
si_llvm_dispose(&ctx);
return -1;
}
shader->info.uses_instanceid |= ls->info.uses_instanceid;
parts[1] = ctx.main_fn;
/* LS prolog */
if (vs_needs_prolog) {
union si_shader_part_key vs_prolog_key;
si_get_vs_prolog_key(&ls->info,
@@ -6881,21 +6878,21 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
parts[0] = ctx.main_fn;
memset(&epilog_key, 0, sizeof(epilog_key));
epilog_key.tcs_epilog.states = shader->key.part.tcs.epilog;
si_build_tcs_epilog_function(&ctx, &epilog_key);
parts[1] = ctx.main_fn;
si_build_wrapper_function(&ctx, parts, 2, 0, 0);
}
- } else if (is_monolithic && ctx.type == PIPE_SHADER_GEOMETRY) {
+ } else if (shader->is_monolithic && ctx.type == PIPE_SHADER_GEOMETRY) {
if (ctx.screen->info.chip_class >= GFX9) {
struct si_shader_selector *es = shader->key.part.gs.es;
LLVMValueRef es_prolog = NULL;
LLVMValueRef es_main = NULL;
LLVMValueRef gs_prolog = NULL;
LLVMValueRef gs_main = ctx.main_fn;
/* GS prolog */
union si_shader_part_key gs_prolog_key;
memset(&gs_prolog_key, 0, sizeof(gs_prolog_key));
@@ -6903,23 +6900,24 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
gs_prolog_key.gs_prolog.is_monolithic = true;
si_build_gs_prolog_function(&ctx, &gs_prolog_key);
gs_prolog = ctx.main_fn;
/* ES main part */
struct si_shader shader_es = {};
shader_es.selector = es;
shader_es.key.as_es = 1;
shader_es.key.mono = shader->key.mono;
shader_es.key.opt = shader->key.opt;
+ shader_es.is_monolithic = true;
si_llvm_context_set_tgsi(&ctx, &shader_es);
- if (!si_compile_tgsi_main(&ctx, true)) {
+ if (!si_compile_tgsi_main(&ctx)) {
si_llvm_dispose(&ctx);
return -1;
}
shader->info.uses_instanceid |= es->info.uses_instanceid;
es_main = ctx.main_fn;
/* ES prolog */
if (es->vs_needs_prolog) {
union si_shader_part_key vs_prolog_key;
si_get_vs_prolog_key(&es->info,
@@ -6954,21 +6952,21 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
parts[1] = ctx.main_fn;
memset(&prolog_key, 0, sizeof(prolog_key));
prolog_key.gs_prolog.states = shader->key.part.gs.prolog;
si_build_gs_prolog_function(&ctx, &prolog_key);
parts[0] = ctx.main_fn;
si_build_wrapper_function(&ctx, parts, 2, 1, 0);
}
- } else if (is_monolithic && ctx.type == PIPE_SHADER_FRAGMENT) {
+ } else if (shader->is_monolithic && ctx.type == PIPE_SHADER_FRAGMENT) {
LLVMValueRef parts[3];
union si_shader_part_key prolog_key;
union si_shader_part_key epilog_key;
bool need_prolog;
si_get_ps_prolog_key(shader, &prolog_key, false);
need_prolog = si_need_ps_prolog(&prolog_key);
parts[need_prolog ? 1 : 0] = ctx.main_fn;
@@ -8062,21 +8060,21 @@ int si_shader_create(struct si_screen *sscreen, struct si_compiler *compiler,
/* LS, ES, VS are compiled on demand if the main part hasn't been
* compiled for that stage.
*
* Vertex shaders are compiled on demand when a vertex fetch
* workaround must be applied.
*/
if (shader->is_monolithic) {
/* Monolithic shader (compiled as a whole, has many variants,
* may take a long time to compile).
*/
- r = si_compile_tgsi_shader(sscreen, compiler, shader, true, debug);
+ r = si_compile_tgsi_shader(sscreen, compiler, shader, debug);
if (r)
return r;
} else {
/* The shader consists of several parts:
*
* - the middle part is the user shader, it has 1 variant only
* and it was compiled during the creation of the shader
* selector
* - the prolog part is inserted at the beginning
* - the epilog part is inserted at the end
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index e1f6b392fbe..fd2f71bed74 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -655,21 +655,20 @@ struct si_shader_part {
/* si_shader.c */
struct si_shader *
si_generate_gs_copy_shader(struct si_screen *sscreen,
struct si_compiler *compiler,
struct si_shader_selector *gs_selector,
struct pipe_debug_callback *debug);
int si_compile_tgsi_shader(struct si_screen *sscreen,
struct si_compiler *compiler,
struct si_shader *shader,
- bool is_monolithic,
struct pipe_debug_callback *debug);
int si_shader_create(struct si_screen *sscreen, struct si_compiler *compiler,
struct si_shader *shader,
struct pipe_debug_callback *debug);
void si_shader_destroy(struct si_shader *shader);
unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned index);
unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index,
unsigned is_varying);
int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 0a347172d62..e528a56023f 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -55,23 +55,20 @@ struct si_shader_context {
struct si_screen *screen;
unsigned type; /* PIPE_SHADER_* specifies the type of shader. */
/* For clamping the non-constant index in resource indexing: */
unsigned num_const_buffers;
unsigned num_shader_buffers;
unsigned num_images;
unsigned num_samplers;
- /* Whether the prolog will be compiled separately. */
- bool separate_prolog;
-
struct ac_shader_abi abi;
/** This function is responsible for initilizing the inputs array and will be
* called once for each input declared in the TGSI shader.
*/
void (*load_input)(struct si_shader_context *,
unsigned input_index,
const struct tgsi_full_declaration *decl,
LLVMValueRef out[4]);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index f0498520ae8..ddd38dabbe6 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1573,24 +1573,24 @@ static bool si_check_missing_main_part(struct si_screen *sscreen,
return false;
/* We can leave the fence as permanently signaled because the
* main part becomes visible globally only after it has been
* compiled. */
util_queue_fence_init(&main_part->ready);
main_part->selector = sel;
main_part->key.as_es = key->as_es;
main_part->key.as_ls = key->as_ls;
+ main_part->is_monolithic = false;
if (si_compile_tgsi_shader(sscreen, compiler_state->compiler,
- main_part, false,
- &compiler_state->debug) != 0) {
+ main_part, &compiler_state->debug) != 0) {
FREE(main_part);
return false;
}
*mainp = main_part;
}
return true;
}
/* Select the hw shader variant depending on the current state. */
static int si_shader_select_with_key(struct si_screen *sscreen,
@@ -1875,39 +1875,40 @@ static void si_init_shader_selector_async(void *job, int thread_index)
if (!shader) {
fprintf(stderr, "radeonsi: can't allocate a main shader part\n");
return;
}
/* We can leave the fence signaled because use of the default
* main part is guarded by the selector's ready fence. */
util_queue_fence_init(&shader->ready);
shader->selector = sel;
+ shader->is_monolithic = false;
si_parse_next_shader_property(&sel->info,
sel->so.num_outputs != 0,
&shader->key);
if (sel->tokens || sel->nir)
ir_binary = si_get_ir_binary(sel);
/* Try to load the shader from the shader cache. */
mtx_lock(&sscreen->shader_cache_mutex);
if (ir_binary &&
si_shader_cache_load_shader(sscreen, ir_binary, shader)) {
mtx_unlock(&sscreen->shader_cache_mutex);
si_shader_dump_stats_for_shader_db(shader, debug);
} else {
mtx_unlock(&sscreen->shader_cache_mutex);
/* Compile the shader if it hasn't been loaded from the cache. */
- if (si_compile_tgsi_shader(sscreen, compiler, shader, false,
+ if (si_compile_tgsi_shader(sscreen, compiler, shader,
debug) != 0) {
FREE(shader);
FREE(ir_binary);
fprintf(stderr, "radeonsi: can't compile a main shader part\n");
return;
}
if (ir_binary) {
mtx_lock(&sscreen->shader_cache_mutex);
if (!si_shader_cache_insert_shader(sscreen, ir_binary, shader, true))
--
2.17.1
More information about the mesa-dev
mailing list