[Mesa-dev] [PATCH 3/4] radeonsi: use postponed KILL only when derivatives are used
Marek Olšák
maraeo at gmail.com
Sat Oct 14 00:58:17 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
---
src/gallium/drivers/radeonsi/si_shader.c | 3 +--
src/gallium/drivers/radeonsi/si_shader.h | 1 +
src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 2 +-
src/gallium/drivers/radeonsi/si_state_shaders.c | 6 ++++++
4 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 8abacac..c343048 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5839,22 +5839,21 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
if (ctx->type == PIPE_SHADER_GEOMETRY) {
int i;
for (i = 0; i < 4; i++) {
ctx->gs_next_vertex[i] =
lp_build_alloca(&ctx->gallivm,
ctx->i32, "");
}
}
- if (ctx->type == PIPE_SHADER_FRAGMENT && sel->info.uses_kill &&
- ctx->screen->b.debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL)) {
+ if (sel->force_correct_derivs_after_kill) {
ctx->postponed_kill = lp_build_alloca_undef(&ctx->gallivm, ctx->i1, "");
/* true = don't kill. */
LLVMBuildStore(ctx->ac.builder, LLVMConstInt(ctx->i1, 1, 0),
ctx->postponed_kill);
}
if (sel->tokens) {
if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
return false;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index ebe956e..78a2ee1 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -337,20 +337,21 @@ struct si_shader_selector {
struct tgsi_token *tokens;
struct nir_shader *nir;
struct pipe_stream_output_info so;
struct tgsi_shader_info info;
struct tgsi_tessctrl_info tcs_info;
/* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
unsigned type;
bool vs_needs_prolog;
+ bool force_correct_derivs_after_kill;
unsigned pa_cl_vs_out_cntl;
ubyte clipdist_mask;
ubyte culldist_mask;
/* GS parameters. */
unsigned esgs_itemsize;
unsigned gs_input_verts_per_prim;
unsigned gs_output_prim;
unsigned gs_max_out_vertices;
unsigned gs_num_invocations;
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
index ad7a42f..283a889 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
@@ -61,21 +61,21 @@ static void kil_emit(const struct lp_build_tgsi_action *action,
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef visible;
if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) {
visible = emit_data->args[0];
} else {
assert(emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL);
visible = LLVMConstInt(ctx->i1, false, 0);
}
- if (ctx->postponed_kill) {
+ if (ctx->shader->selector->force_correct_derivs_after_kill) {
LLVMValueRef mask = LLVMBuildLoad(builder, ctx->postponed_kill, "");
mask = LLVMBuildAnd(builder, mask, visible, "");
LLVMBuildStore(builder, mask, ctx->postponed_kill);
return;
}
ac_build_kill_if_false(&ctx->ac, visible);
}
static void emit_icmp(const struct lp_build_tgsi_action *action,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 9340328..af7f6ec 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2045,20 +2045,26 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
sel->enabled_streamout_buffer_mask |=
(1 << sel->so.output[i].output_buffer) <<
(sel->so.output[i].stream * 4);
}
/* The prolog is a no-op if there are no inputs. */
sel->vs_needs_prolog = sel->type == PIPE_SHADER_VERTEX &&
sel->info.num_inputs &&
!sel->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS];
+ sel->force_correct_derivs_after_kill =
+ sel->type == PIPE_SHADER_FRAGMENT &&
+ sel->info.uses_derivatives &&
+ sel->info.uses_kill &&
+ sctx->screen->b.debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL);
+
/* Set which opcode uses which (i,j) pair. */
if (sel->info.uses_persp_opcode_interp_centroid)
sel->info.uses_persp_centroid = true;
if (sel->info.uses_linear_opcode_interp_centroid)
sel->info.uses_linear_centroid = true;
if (sel->info.uses_persp_opcode_interp_offset ||
sel->info.uses_persp_opcode_interp_sample)
sel->info.uses_persp_center = true;
--
2.7.4
More information about the mesa-dev
mailing list