[Mesa-dev] [PATCH 1/4] ac: replace ac_build_kill with ac_build_kill_if_false

Sat Oct 14 00:58:15 UTC 2017

From: Marek Olšák <marek.olsak at amd.com>

This will be a new LLVM intrinsic and will also work nicely with
llvm.amdgcn.wqm.vote.
---
 src/amd/common/ac_llvm_build.c                    | 19 +++------
 src/amd/common/ac_llvm_build.h                    |  2 +-
 src/amd/common/ac_nir_to_llvm.c                   | 16 ++------
 src/gallium/drivers/radeonsi/si_shader.c          | 48 +++++++++++------------
 src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 42 ++++++++------------
 5 files changed, 49 insertions(+), 78 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 949f181..752c42e 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1398,34 +1398,27 @@ LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
 					   v2f16, args, 2,
 					   AC_FUNC_ATTR_READNONE);
 		return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
 	}
 
 	return ac_build_intrinsic(ctx, "llvm.SI.packf16", ctx->i32, args, 2,
 				  AC_FUNC_ATTR_READNONE |
 				  AC_FUNC_ATTR_LEGACY);
 }
 
-/**
- * KILL, AKA discard in GLSL.
- *
- * \param value  kill if value < 0.0 or value == NULL.
- */
-void ac_build_kill(struct ac_llvm_context *ctx, LLVMValueRef value)
+void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1)
 {
-	if (value) {
-		ac_build_intrinsic(ctx, "llvm.AMDGPU.kill", ctx->voidt,
-				   &value, 1, AC_FUNC_ATTR_LEGACY);
-	} else {
-		ac_build_intrinsic(ctx, "llvm.AMDGPU.kilp", ctx->voidt,
-				   NULL, 0, AC_FUNC_ATTR_LEGACY);
-	}
+	LLVMValueRef value = LLVMBuildSelect(ctx->builder, i1,
+					     LLVMConstReal(ctx->f32, 1),
+					     LLVMConstReal(ctx->f32, -1), "");
+	ac_build_intrinsic(ctx, "llvm.AMDGPU.kill", ctx->voidt,
+			   &value, 1, AC_FUNC_ATTR_LEGACY);
 }
 
 LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
 			  LLVMValueRef offset, LLVMValueRef width,
 			  bool is_signed)
 {
 	LLVMValueRef args[] = {
 		input,
 		offset,
 		width,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index f0b5875..b721782 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -258,21 +258,21 @@ struct ac_image_args {
 	LLVMValueRef addr;
 	unsigned dmask;
 	bool unorm;
 	bool da;
 };
 
 LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
 				   struct ac_image_args *a);
 LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
 				    LLVMValueRef args[2]);
-void ac_build_kill(struct ac_llvm_context *ctx, LLVMValueRef value);
+void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1);
 LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
 			  LLVMValueRef offset, LLVMValueRef width,
 			  bool is_signed);
 
 void ac_get_image_intr_name(const char *base_name,
 			    LLVMTypeRef data_type,
 			    LLVMTypeRef coords_type,
 			    LLVMTypeRef rsrc_type,
 			    char *out_name, unsigned out_len);
 
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 3ba3ebf..c1490a5 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3614,28 +3614,24 @@ static void emit_barrier(struct nir_to_llvm_context *ctx)
 	}
 	ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.s.barrier",
 			   ctx->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT);
 }
 
 static void emit_discard_if(struct ac_nir_context *ctx,
 			    const nir_intrinsic_instr *instr)
 {
 	LLVMValueRef cond;
 
-	cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE,
+	cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
 			     get_src(ctx, instr->src[0]),
 			     ctx->ac.i32_0, "");
-
-	cond = LLVMBuildSelect(ctx->ac.builder, cond,
-			       LLVMConstReal(ctx->ac.f32, -1.0f),
-			       ctx->ac.f32_0, "");
-	ac_build_kill(&ctx->ac, cond);
+	ac_build_kill_if_false(&ctx->ac, cond);
 }
 
 static LLVMValueRef
 visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
 {
 	LLVMValueRef result;
 	LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
 	result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
 			      LLVMConstInt(ctx->i32, 0xfc0, false), "");
 
@@ -3856,41 +3852,37 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
 		}
 	}
 	return ac_build_gather_values(&ctx->ac, result, 2);
 }
 
 static void
 visit_emit_vertex(struct nir_to_llvm_context *ctx,
 		  const nir_intrinsic_instr *instr)
 {
 	LLVMValueRef gs_next_vertex;
-	LLVMValueRef can_emit, kill;
+	LLVMValueRef can_emit;
 	int idx;
 
 	assert(instr->const_index[0] == 0);
 	/* Write vertex attribute values to GSVS ring */
 	gs_next_vertex = LLVMBuildLoad(ctx->builder,
 				       ctx->gs_next_vertex,
 				       "");
 
 	/* If this thread has already emitted the declared maximum number of
 	 * vertices, kill it: excessive vertex emissions are not supposed to
 	 * have any effect, and GS threads have no externally observable
 	 * effects other than emitting vertices.
 	 */
 	can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex,
 				 LLVMConstInt(ctx->i32, ctx->gs_max_out_vertices, false), "");
-
-	kill = LLVMBuildSelect(ctx->builder, can_emit,
-			       LLVMConstReal(ctx->f32, 1.0f),
-			       LLVMConstReal(ctx->f32, -1.0f), "");
-	ac_build_kill(&ctx->ac, kill);
+	ac_build_kill_if_false(&ctx->ac, can_emit);
 
 	/* loop num outputs */
 	idx = 0;
 	for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
 		LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
 		int length = 4;
 		int slot = idx;
 		int slot_inc = 1;
 
 		if (!(ctx->output_mask & (1ull << i)))
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index c3fe13d..8abacac 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2265,36 +2265,38 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
 		break;
 	}
 }
 
 static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
 			  LLVMValueRef alpha)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 
 	if (ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_NEVER) {
+		static LLVMRealPredicate cond_map[PIPE_FUNC_ALWAYS + 1] = {
+			[PIPE_FUNC_LESS] = LLVMRealOLT,
+			[PIPE_FUNC_EQUAL] = LLVMRealOEQ,
+			[PIPE_FUNC_LEQUAL] = LLVMRealOLE,
+			[PIPE_FUNC_GREATER] = LLVMRealOGT,
+			[PIPE_FUNC_NOTEQUAL] = LLVMRealONE,
+			[PIPE_FUNC_GEQUAL] = LLVMRealOGE,
+		};
+		LLVMRealPredicate cond = cond_map[ctx->shader->key.part.ps.epilog.alpha_func];
+		assert(cond);
+
 		LLVMValueRef alpha_ref = LLVMGetParam(ctx->main_fn,
 				SI_PARAM_ALPHA_REF);
-
 		LLVMValueRef alpha_pass =
-			lp_build_cmp(&bld_base->base,
-				     ctx->shader->key.part.ps.epilog.alpha_func,
-				     alpha, alpha_ref);
-		LLVMValueRef arg =
-			lp_build_select(&bld_base->base,
-					alpha_pass,
-					LLVMConstReal(ctx->f32, 1.0f),
-					LLVMConstReal(ctx->f32, -1.0f));
-
-		ac_build_kill(&ctx->ac, arg);
+			LLVMBuildFCmp(ctx->ac.builder, cond, alpha, alpha_ref, "");
+		ac_build_kill_if_false(&ctx->ac, alpha_pass);
 	} else {
-		ac_build_kill(&ctx->ac, NULL);
+		ac_build_kill_if_false(&ctx->ac, LLVMConstInt(ctx->i1, 0, 0));
 	}
 }
 
 static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base,
 						  LLVMValueRef alpha,
 						  unsigned samplemask_param)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	LLVMValueRef coverage;
 
@@ -3566,21 +3568,21 @@ static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi,
 	struct si_shader *shader = ctx->shader;
 	struct tgsi_shader_info *info = &shader->selector->info;
 	LLVMBuilderRef builder = ctx->ac.builder;
 	unsigned i, j, first_vgpr, vgpr;
 
 	LLVMValueRef color[8][4] = {};
 	LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
 	LLVMValueRef ret;
 
 	if (ctx->postponed_kill)
-		ac_build_kill(&ctx->ac, LLVMBuildLoad(builder, ctx->postponed_kill, ""));
+		ac_build_kill_if_false(&ctx->ac, LLVMBuildLoad(builder, ctx->postponed_kill, ""));
 
 	/* Read the output values. */
 	for (i = 0; i < info->num_outputs; i++) {
 		unsigned semantic_name = info->output_semantic_name[i];
 		unsigned semantic_index = info->output_semantic_index[i];
 
 		switch (semantic_name) {
 		case TGSI_SEMANTIC_COLOR:
 			assert(semantic_index < 8);
 			for (j = 0; j < 4; j++) {
@@ -4049,21 +4051,21 @@ static void si_llvm_emit_vertex(
 	struct lp_build_emit_data *emit_data)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct lp_build_context *uint = &bld_base->uint_bld;
 	struct si_shader *shader = ctx->shader;
 	struct tgsi_shader_info *info = &shader->selector->info;
 	struct lp_build_if_state if_state;
 	LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
 					    ctx->param_gs2vs_offset);
 	LLVMValueRef gs_next_vertex;
-	LLVMValueRef can_emit, kill;
+	LLVMValueRef can_emit;
 	unsigned chan, offset;
 	int i;
 	unsigned stream;
 
 	stream = si_llvm_get_stream(bld_base, emit_data);
 
 	/* Write vertex attribute values to GSVS ring */
 	gs_next_vertex = LLVMBuildLoad(ctx->ac.builder,
 				       ctx->gs_next_vertex[stream],
 				       "");
@@ -4075,25 +4077,21 @@ static void si_llvm_emit_vertex(
 	 * If the shader has no writes to memory, kill it instead. This skips
 	 * further memory loads and may allow LLVM to skip to the end
 	 * altogether.
 	 */
 	can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex,
 				 LLVMConstInt(ctx->i32,
 					      shader->selector->gs_max_out_vertices, 0), "");
 
 	bool use_kill = !info->writes_memory;
 	if (use_kill) {
-		kill = lp_build_select(&bld_base->base, can_emit,
-				       LLVMConstReal(ctx->f32, 1.0f),
-				       LLVMConstReal(ctx->f32, -1.0f));
-
-		ac_build_kill(&ctx->ac, kill);
+		ac_build_kill_if_false(&ctx->ac, can_emit);
 	} else {
 		lp_build_if(&if_state, &ctx->gallivm, can_emit);
 	}
 
 	offset = 0;
 	for (i = 0; i < info->num_outputs; i++) {
 		LLVMValueRef *out_ptr = ctx->outputs[i];
 
 		for (chan = 0; chan < 4; chan++) {
 			if (!(info->output_usagemask[i] & (1 << chan)) ||
@@ -4874,25 +4872,21 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
 	slot = LLVMConstInt(ctx->i32, SI_PS_CONST_POLY_STIPPLE, 0);
 	desc = ac_build_load_to_sgpr(&ctx->ac, param_rw_buffers, slot);
 
 	/* The stipple pattern is 32x32, each row has 32 bits. */
 	offset = LLVMBuildMul(builder, address[1],
 			      LLVMConstInt(ctx->i32, 4, 0), "");
 	row = buffer_load_const(ctx, desc, offset);
 	row = ac_to_integer(&ctx->ac, row);
 	bit = LLVMBuildLShr(builder, row, address[0], "");
 	bit = LLVMBuildTrunc(builder, bit, ctx->i1, "");
-
-	/* The intrinsic kills the thread if arg < 0. */
-	bit = LLVMBuildSelect(builder, bit, LLVMConstReal(ctx->f32, 0),
-			      LLVMConstReal(ctx->f32, -1), "");
-	ac_build_kill(&ctx->ac, bit);
+	ac_build_kill_if_false(&ctx->ac, bit);
 }
 
 void si_shader_binary_read_config(struct ac_shader_binary *binary,
 				  struct si_shader_config *conf,
 				  unsigned symbol_offset)
 {
 	unsigned i;
 	const unsigned char *config =
 		ac_shader_binary_config_start(binary, symbol_offset);
 	bool really_needs_scratch = false;
@@ -5847,22 +5841,24 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
 		int i;
 		for (i = 0; i < 4; i++) {
 			ctx->gs_next_vertex[i] =
 				lp_build_alloca(&ctx->gallivm,
 						ctx->i32, "");
 		}
 	}
 
 	if (ctx->type == PIPE_SHADER_FRAGMENT && sel->info.uses_kill &&
 	    ctx->screen->b.debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL)) {
-		/* This is initialized to 0.0 = not kill. */
-		ctx->postponed_kill = lp_build_alloca(&ctx->gallivm, ctx->f32, "");
+		ctx->postponed_kill = lp_build_alloca_undef(&ctx->gallivm, ctx->i1, "");
+		/* true = don't kill. */
+		LLVMBuildStore(ctx->ac.builder, LLVMConstInt(ctx->i1, 1, 0),
+			       ctx->postponed_kill);
 	}
 
 	if (sel->tokens) {
 		if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
 			fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
 			return false;
 		}
 	} else {
 		if (!si_nir_build_llvm(ctx, sel->nir)) {
 			fprintf(stderr, "Failed to translate shader from NIR to LLVM\n");
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
index fc705c3..ad7a42f 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
@@ -32,67 +32,57 @@ static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base,
 			       struct lp_build_emit_data *emit_data)
 {
 	const struct tgsi_full_instruction *inst = emit_data->inst;
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	LLVMBuilderRef builder = ctx->ac.builder;
 	unsigned i;
 	LLVMValueRef conds[TGSI_NUM_CHANNELS];
 
 	for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
 		LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
-		conds[i] = LLVMBuildFCmp(builder, LLVMRealOLT, value,
+		conds[i] = LLVMBuildFCmp(builder, LLVMRealOGE, value,
 					ctx->ac.f32_0, "");
 	}
 
-	/* Or the conditions together */
+	/* And the conditions together */
 	for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
-		conds[i - 1] = LLVMBuildOr(builder, conds[i], conds[i - 1], "");
+		conds[i - 1] = LLVMBuildAnd(builder, conds[i], conds[i - 1], "");
 	}
 
 	emit_data->dst_type = ctx->voidt;
 	emit_data->arg_count = 1;
-	emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
-					LLVMConstReal(ctx->f32, -1.0f),
-					ctx->ac.f32_0, "");
+	emit_data->args[0] = conds[0];
 }
 
 static void kil_emit(const struct lp_build_tgsi_action *action,
 		     struct lp_build_tgsi_context *bld_base,
 		     struct lp_build_emit_data *emit_data)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	LLVMBuilderRef builder = ctx->ac.builder;
+	LLVMValueRef visible;
+
+	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) {
+		visible = emit_data->args[0];
+	} else {
+		assert(emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL);
+		visible = LLVMConstInt(ctx->i1, false, 0);
+	}
 
 	if (ctx->postponed_kill) {
-		if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) {
-			LLVMValueRef val;
-
-			/* Take the minimum kill value. This is the same as OR
-			 * between 2 kill values. If the value is negative,
-			 * the pixel will be killed.
-			 */
-			val = LLVMBuildLoad(builder, ctx->postponed_kill, "");
-			val = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MIN,
-							val, emit_data->args[0]);
-			LLVMBuildStore(builder, val, ctx->postponed_kill);
-		} else {
-			LLVMBuildStore(builder,
-				       LLVMConstReal(ctx->f32, -1),
-				       ctx->postponed_kill);
-		}
+		LLVMValueRef mask = LLVMBuildLoad(builder, ctx->postponed_kill, "");
+		mask = LLVMBuildAnd(builder, mask, visible, "");
+		LLVMBuildStore(builder, mask, ctx->postponed_kill);
 		return;
 	}
 
-	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF)
-		ac_build_kill(&ctx->ac, emit_data->args[0]);
-	else
-		ac_build_kill(&ctx->ac, NULL);
+	ac_build_kill_if_false(&ctx->ac, visible);
 }
 
 static void emit_icmp(const struct lp_build_tgsi_action *action,
 		      struct lp_build_tgsi_context *bld_base,
 		      struct lp_build_emit_data *emit_data)
 {
 	unsigned pred;
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 
 	switch (emit_data->inst->Instruction.Opcode) {
-- 
2.7.4