[Mesa-dev] [PATCH 1/4] ac: replace ac_build_kill with ac_build_kill_if_false
Marek Olšák
maraeo at gmail.com
Sat Oct 14 00:58:15 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
This will be a new LLVM intrinsic and will also work nicely with
llvm.amdgcn.wqm.vote.
---
src/amd/common/ac_llvm_build.c | 19 +++------
src/amd/common/ac_llvm_build.h | 2 +-
src/amd/common/ac_nir_to_llvm.c | 16 ++------
src/gallium/drivers/radeonsi/si_shader.c | 48 +++++++++++------------
src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 42 ++++++++------------
5 files changed, 49 insertions(+), 78 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 949f181..752c42e 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1398,34 +1398,27 @@ LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
v2f16, args, 2,
AC_FUNC_ATTR_READNONE);
return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
}
return ac_build_intrinsic(ctx, "llvm.SI.packf16", ctx->i32, args, 2,
AC_FUNC_ATTR_READNONE |
AC_FUNC_ATTR_LEGACY);
}
-/**
- * KILL, AKA discard in GLSL.
- *
- * \param value kill if value < 0.0 or value == NULL.
- */
-void ac_build_kill(struct ac_llvm_context *ctx, LLVMValueRef value)
+void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1)
{
- if (value) {
- ac_build_intrinsic(ctx, "llvm.AMDGPU.kill", ctx->voidt,
- &value, 1, AC_FUNC_ATTR_LEGACY);
- } else {
- ac_build_intrinsic(ctx, "llvm.AMDGPU.kilp", ctx->voidt,
- NULL, 0, AC_FUNC_ATTR_LEGACY);
- }
+ LLVMValueRef value = LLVMBuildSelect(ctx->builder, i1,
+ LLVMConstReal(ctx->f32, 1),
+ LLVMConstReal(ctx->f32, -1), "");
+ ac_build_intrinsic(ctx, "llvm.AMDGPU.kill", ctx->voidt,
+ &value, 1, AC_FUNC_ATTR_LEGACY);
}
LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
LLVMValueRef offset, LLVMValueRef width,
bool is_signed)
{
LLVMValueRef args[] = {
input,
offset,
width,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index f0b5875..b721782 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -258,21 +258,21 @@ struct ac_image_args {
LLVMValueRef addr;
unsigned dmask;
bool unorm;
bool da;
};
LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
struct ac_image_args *a);
LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
LLVMValueRef args[2]);
-void ac_build_kill(struct ac_llvm_context *ctx, LLVMValueRef value);
+void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1);
LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
LLVMValueRef offset, LLVMValueRef width,
bool is_signed);
void ac_get_image_intr_name(const char *base_name,
LLVMTypeRef data_type,
LLVMTypeRef coords_type,
LLVMTypeRef rsrc_type,
char *out_name, unsigned out_len);
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 3ba3ebf..c1490a5 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3614,28 +3614,24 @@ static void emit_barrier(struct nir_to_llvm_context *ctx)
}
ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.s.barrier",
ctx->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT);
}
static void emit_discard_if(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr)
{
LLVMValueRef cond;
- cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE,
+ cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
get_src(ctx, instr->src[0]),
ctx->ac.i32_0, "");
-
- cond = LLVMBuildSelect(ctx->ac.builder, cond,
- LLVMConstReal(ctx->ac.f32, -1.0f),
- ctx->ac.f32_0, "");
- ac_build_kill(&ctx->ac, cond);
+ ac_build_kill_if_false(&ctx->ac, cond);
}
static LLVMValueRef
visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
{
LLVMValueRef result;
LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
LLVMConstInt(ctx->i32, 0xfc0, false), "");
@@ -3856,41 +3852,37 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
}
}
return ac_build_gather_values(&ctx->ac, result, 2);
}
static void
visit_emit_vertex(struct nir_to_llvm_context *ctx,
const nir_intrinsic_instr *instr)
{
LLVMValueRef gs_next_vertex;
- LLVMValueRef can_emit, kill;
+ LLVMValueRef can_emit;
int idx;
assert(instr->const_index[0] == 0);
/* Write vertex attribute values to GSVS ring */
gs_next_vertex = LLVMBuildLoad(ctx->builder,
ctx->gs_next_vertex,
"");
/* If this thread has already emitted the declared maximum number of
* vertices, kill it: excessive vertex emissions are not supposed to
* have any effect, and GS threads have no externally observable
* effects other than emitting vertices.
*/
can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex,
LLVMConstInt(ctx->i32, ctx->gs_max_out_vertices, false), "");
-
- kill = LLVMBuildSelect(ctx->builder, can_emit,
- LLVMConstReal(ctx->f32, 1.0f),
- LLVMConstReal(ctx->f32, -1.0f), "");
- ac_build_kill(&ctx->ac, kill);
+ ac_build_kill_if_false(&ctx->ac, can_emit);
/* loop num outputs */
idx = 0;
for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
int length = 4;
int slot = idx;
int slot_inc = 1;
if (!(ctx->output_mask & (1ull << i)))
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index c3fe13d..8abacac 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2265,36 +2265,38 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
break;
}
}
static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
LLVMValueRef alpha)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
if (ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_NEVER) {
+ static LLVMRealPredicate cond_map[PIPE_FUNC_ALWAYS + 1] = {
+ [PIPE_FUNC_LESS] = LLVMRealOLT,
+ [PIPE_FUNC_EQUAL] = LLVMRealOEQ,
+ [PIPE_FUNC_LEQUAL] = LLVMRealOLE,
+ [PIPE_FUNC_GREATER] = LLVMRealOGT,
+ [PIPE_FUNC_NOTEQUAL] = LLVMRealONE,
+ [PIPE_FUNC_GEQUAL] = LLVMRealOGE,
+ };
+ LLVMRealPredicate cond = cond_map[ctx->shader->key.part.ps.epilog.alpha_func];
+ assert(cond);
+
LLVMValueRef alpha_ref = LLVMGetParam(ctx->main_fn,
SI_PARAM_ALPHA_REF);
-
LLVMValueRef alpha_pass =
- lp_build_cmp(&bld_base->base,
- ctx->shader->key.part.ps.epilog.alpha_func,
- alpha, alpha_ref);
- LLVMValueRef arg =
- lp_build_select(&bld_base->base,
- alpha_pass,
- LLVMConstReal(ctx->f32, 1.0f),
- LLVMConstReal(ctx->f32, -1.0f));
-
- ac_build_kill(&ctx->ac, arg);
+ LLVMBuildFCmp(ctx->ac.builder, cond, alpha, alpha_ref, "");
+ ac_build_kill_if_false(&ctx->ac, alpha_pass);
} else {
- ac_build_kill(&ctx->ac, NULL);
+ ac_build_kill_if_false(&ctx->ac, LLVMConstInt(ctx->i1, 0, 0));
}
}
static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base,
LLVMValueRef alpha,
unsigned samplemask_param)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef coverage;
@@ -3566,21 +3568,21 @@ static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi,
struct si_shader *shader = ctx->shader;
struct tgsi_shader_info *info = &shader->selector->info;
LLVMBuilderRef builder = ctx->ac.builder;
unsigned i, j, first_vgpr, vgpr;
LLVMValueRef color[8][4] = {};
LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
LLVMValueRef ret;
if (ctx->postponed_kill)
- ac_build_kill(&ctx->ac, LLVMBuildLoad(builder, ctx->postponed_kill, ""));
+ ac_build_kill_if_false(&ctx->ac, LLVMBuildLoad(builder, ctx->postponed_kill, ""));
/* Read the output values. */
for (i = 0; i < info->num_outputs; i++) {
unsigned semantic_name = info->output_semantic_name[i];
unsigned semantic_index = info->output_semantic_index[i];
switch (semantic_name) {
case TGSI_SEMANTIC_COLOR:
assert(semantic_index < 8);
for (j = 0; j < 4; j++) {
@@ -4049,21 +4051,21 @@ static void si_llvm_emit_vertex(
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct lp_build_context *uint = &bld_base->uint_bld;
struct si_shader *shader = ctx->shader;
struct tgsi_shader_info *info = &shader->selector->info;
struct lp_build_if_state if_state;
LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
ctx->param_gs2vs_offset);
LLVMValueRef gs_next_vertex;
- LLVMValueRef can_emit, kill;
+ LLVMValueRef can_emit;
unsigned chan, offset;
int i;
unsigned stream;
stream = si_llvm_get_stream(bld_base, emit_data);
/* Write vertex attribute values to GSVS ring */
gs_next_vertex = LLVMBuildLoad(ctx->ac.builder,
ctx->gs_next_vertex[stream],
"");
@@ -4075,25 +4077,21 @@ static void si_llvm_emit_vertex(
* If the shader has no writes to memory, kill it instead. This skips
* further memory loads and may allow LLVM to skip to the end
* altogether.
*/
can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex,
LLVMConstInt(ctx->i32,
shader->selector->gs_max_out_vertices, 0), "");
bool use_kill = !info->writes_memory;
if (use_kill) {
- kill = lp_build_select(&bld_base->base, can_emit,
- LLVMConstReal(ctx->f32, 1.0f),
- LLVMConstReal(ctx->f32, -1.0f));
-
- ac_build_kill(&ctx->ac, kill);
+ ac_build_kill_if_false(&ctx->ac, can_emit);
} else {
lp_build_if(&if_state, &ctx->gallivm, can_emit);
}
offset = 0;
for (i = 0; i < info->num_outputs; i++) {
LLVMValueRef *out_ptr = ctx->outputs[i];
for (chan = 0; chan < 4; chan++) {
if (!(info->output_usagemask[i] & (1 << chan)) ||
@@ -4874,25 +4872,21 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
slot = LLVMConstInt(ctx->i32, SI_PS_CONST_POLY_STIPPLE, 0);
desc = ac_build_load_to_sgpr(&ctx->ac, param_rw_buffers, slot);
/* The stipple pattern is 32x32, each row has 32 bits. */
offset = LLVMBuildMul(builder, address[1],
LLVMConstInt(ctx->i32, 4, 0), "");
row = buffer_load_const(ctx, desc, offset);
row = ac_to_integer(&ctx->ac, row);
bit = LLVMBuildLShr(builder, row, address[0], "");
bit = LLVMBuildTrunc(builder, bit, ctx->i1, "");
-
- /* The intrinsic kills the thread if arg < 0. */
- bit = LLVMBuildSelect(builder, bit, LLVMConstReal(ctx->f32, 0),
- LLVMConstReal(ctx->f32, -1), "");
- ac_build_kill(&ctx->ac, bit);
+ ac_build_kill_if_false(&ctx->ac, bit);
}
void si_shader_binary_read_config(struct ac_shader_binary *binary,
struct si_shader_config *conf,
unsigned symbol_offset)
{
unsigned i;
const unsigned char *config =
ac_shader_binary_config_start(binary, symbol_offset);
bool really_needs_scratch = false;
@@ -5847,22 +5841,24 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
int i;
for (i = 0; i < 4; i++) {
ctx->gs_next_vertex[i] =
lp_build_alloca(&ctx->gallivm,
ctx->i32, "");
}
}
if (ctx->type == PIPE_SHADER_FRAGMENT && sel->info.uses_kill &&
ctx->screen->b.debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL)) {
- /* This is initialized to 0.0 = not kill. */
- ctx->postponed_kill = lp_build_alloca(&ctx->gallivm, ctx->f32, "");
+ ctx->postponed_kill = lp_build_alloca_undef(&ctx->gallivm, ctx->i1, "");
+ /* true = don't kill. */
+ LLVMBuildStore(ctx->ac.builder, LLVMConstInt(ctx->i1, 1, 0),
+ ctx->postponed_kill);
}
if (sel->tokens) {
if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
return false;
}
} else {
if (!si_nir_build_llvm(ctx, sel->nir)) {
fprintf(stderr, "Failed to translate shader from NIR to LLVM\n");
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
index fc705c3..ad7a42f 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
@@ -32,67 +32,57 @@ static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
const struct tgsi_full_instruction *inst = emit_data->inst;
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMBuilderRef builder = ctx->ac.builder;
unsigned i;
LLVMValueRef conds[TGSI_NUM_CHANNELS];
for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
- conds[i] = LLVMBuildFCmp(builder, LLVMRealOLT, value,
+ conds[i] = LLVMBuildFCmp(builder, LLVMRealOGE, value,
ctx->ac.f32_0, "");
}
- /* Or the conditions together */
+ /* And the conditions together */
for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
- conds[i - 1] = LLVMBuildOr(builder, conds[i], conds[i - 1], "");
+ conds[i - 1] = LLVMBuildAnd(builder, conds[i], conds[i - 1], "");
}
emit_data->dst_type = ctx->voidt;
emit_data->arg_count = 1;
- emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
- LLVMConstReal(ctx->f32, -1.0f),
- ctx->ac.f32_0, "");
+ emit_data->args[0] = conds[0];
}
static void kil_emit(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef visible;
+
+ if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) {
+ visible = emit_data->args[0];
+ } else {
+ assert(emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL);
+ visible = LLVMConstInt(ctx->i1, false, 0);
+ }
if (ctx->postponed_kill) {
- if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) {
- LLVMValueRef val;
-
- /* Take the minimum kill value. This is the same as OR
- * between 2 kill values. If the value is negative,
- * the pixel will be killed.
- */
- val = LLVMBuildLoad(builder, ctx->postponed_kill, "");
- val = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MIN,
- val, emit_data->args[0]);
- LLVMBuildStore(builder, val, ctx->postponed_kill);
- } else {
- LLVMBuildStore(builder,
- LLVMConstReal(ctx->f32, -1),
- ctx->postponed_kill);
- }
+ LLVMValueRef mask = LLVMBuildLoad(builder, ctx->postponed_kill, "");
+ mask = LLVMBuildAnd(builder, mask, visible, "");
+ LLVMBuildStore(builder, mask, ctx->postponed_kill);
return;
}
- if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF)
- ac_build_kill(&ctx->ac, emit_data->args[0]);
- else
- ac_build_kill(&ctx->ac, NULL);
+ ac_build_kill_if_false(&ctx->ac, visible);
}
static void emit_icmp(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
unsigned pred;
struct si_shader_context *ctx = si_shader_context(bld_base);
switch (emit_data->inst->Instruction.Opcode) {
--
2.7.4
More information about the mesa-dev
mailing list