[Mesa-dev] [PATCH 3/4] radeonsi: use postponed KILL only when derivatives are used

Marek Olšák maraeo at gmail.com
Sat Oct 14 00:58:17 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_shader.c          | 3 +--
 src/gallium/drivers/radeonsi/si_shader.h          | 1 +
 src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 2 +-
 src/gallium/drivers/radeonsi/si_state_shaders.c   | 6 ++++++
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 8abacac..c343048 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5839,22 +5839,21 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
 
 	if (ctx->type == PIPE_SHADER_GEOMETRY) {
 		int i;
 		for (i = 0; i < 4; i++) {
 			ctx->gs_next_vertex[i] =
 				lp_build_alloca(&ctx->gallivm,
 						ctx->i32, "");
 		}
 	}
 
-	if (ctx->type == PIPE_SHADER_FRAGMENT && sel->info.uses_kill &&
-	    ctx->screen->b.debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL)) {
+	if (sel->force_correct_derivs_after_kill) {
 		ctx->postponed_kill = lp_build_alloca_undef(&ctx->gallivm, ctx->i1, "");
 		/* true = don't kill. */
 		LLVMBuildStore(ctx->ac.builder, LLVMConstInt(ctx->i1, 1, 0),
 			       ctx->postponed_kill);
 	}
 
 	if (sel->tokens) {
 		if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
 			fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
 			return false;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index ebe956e..78a2ee1 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -337,20 +337,21 @@ struct si_shader_selector {
 
 	struct tgsi_token       *tokens;
 	struct nir_shader       *nir;
 	struct pipe_stream_output_info  so;
 	struct tgsi_shader_info		info;
 	struct tgsi_tessctrl_info	tcs_info;
 
 	/* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
 	unsigned	type;
 	bool		vs_needs_prolog;
+	bool		force_correct_derivs_after_kill;
 	unsigned	pa_cl_vs_out_cntl;
 	ubyte		clipdist_mask;
 	ubyte		culldist_mask;
 
 	/* GS parameters. */
 	unsigned	esgs_itemsize;
 	unsigned	gs_input_verts_per_prim;
 	unsigned	gs_output_prim;
 	unsigned	gs_max_out_vertices;
 	unsigned	gs_num_invocations;
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
index ad7a42f..283a889 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
@@ -61,21 +61,21 @@ static void kil_emit(const struct lp_build_tgsi_action *action,
 	LLVMBuilderRef builder = ctx->ac.builder;
 	LLVMValueRef visible;
 
 	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) {
 		visible = emit_data->args[0];
 	} else {
 		assert(emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL);
 		visible = LLVMConstInt(ctx->i1, false, 0);
 	}
 
-	if (ctx->postponed_kill) {
+	if (ctx->shader->selector->force_correct_derivs_after_kill) {
 		LLVMValueRef mask = LLVMBuildLoad(builder, ctx->postponed_kill, "");
 		mask = LLVMBuildAnd(builder, mask, visible, "");
 		LLVMBuildStore(builder, mask, ctx->postponed_kill);
 		return;
 	}
 
 	ac_build_kill_if_false(&ctx->ac, visible);
 }
 
 static void emit_icmp(const struct lp_build_tgsi_action *action,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 9340328..af7f6ec 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2045,20 +2045,26 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 		sel->enabled_streamout_buffer_mask |=
 			(1 << sel->so.output[i].output_buffer) <<
 			(sel->so.output[i].stream * 4);
 	}
 
 	/* The prolog is a no-op if there are no inputs. */
 	sel->vs_needs_prolog = sel->type == PIPE_SHADER_VERTEX &&
 			       sel->info.num_inputs &&
 			       !sel->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS];
 
+	sel->force_correct_derivs_after_kill =
+		sel->type == PIPE_SHADER_FRAGMENT &&
+		sel->info.uses_derivatives &&
+		sel->info.uses_kill &&
+		sctx->screen->b.debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL);
+
 	/* Set which opcode uses which (i,j) pair. */
 	if (sel->info.uses_persp_opcode_interp_centroid)
 		sel->info.uses_persp_centroid = true;
 
 	if (sel->info.uses_linear_opcode_interp_centroid)
 		sel->info.uses_linear_centroid = true;
 
 	if (sel->info.uses_persp_opcode_interp_offset ||
 	    sel->info.uses_persp_opcode_interp_sample)
 		sel->info.uses_persp_center = true;
-- 
2.7.4



More information about the mesa-dev mailing list