[Mesa-dev] [PATCH] nir: optimize gl_SampleMaskIn to gl_HelperInvocation for radeonsi when possible
Marek Olšák
maraeo at gmail.com
Wed Apr 10 02:03:21 UTC 2019
From: Marek Olšák <marek.olsak at amd.com>
---
src/compiler/nir/nir.h | 8 +++++
src/compiler/nir/nir_opt_intrinsics.c | 40 +++++++++++++++++++++--
src/gallium/drivers/radeonsi/si_get.c | 1 +
src/mesa/state_tracker/st_glsl_to_nir.cpp | 1 +
4 files changed, 48 insertions(+), 2 deletions(-)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 09950bf3398..d3874705235 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2283,20 +2283,28 @@ typedef struct nir_shader_compiler_options {
*
* This depends on some possibly hw implementation details, which may
* not be true for all hw. In particular that the FS is only executed
* for covered samples or for helper invocations. So, do not blindly
* enable this option.
*
* Note: See also issue #22 in ARB_shader_image_load_store
*/
bool lower_helper_invocation;
+ /**
+ * Convert gl_SampleMaskIn to gl_HelperInvocation as follows:
+ *
+ * gl_SampleMaskIn == 0 ---> gl_HelperInvocation
+ * gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
+ */
+ bool optimize_sample_mask_in;
+
bool lower_cs_local_index_from_id;
bool lower_cs_local_id_from_index;
bool lower_device_index_to_zero;
/* Set if nir_lower_wpos_ytransform() should also invert gl_PointCoord. */
bool lower_wpos_pntc;
bool lower_hadd;
bool lower_add_sat;
diff --git a/src/compiler/nir/nir_opt_intrinsics.c b/src/compiler/nir/nir_opt_intrinsics.c
index 7b054faa204..2f9e4e466c9 100644
--- a/src/compiler/nir/nir_opt_intrinsics.c
+++ b/src/compiler/nir/nir_opt_intrinsics.c
@@ -22,21 +22,22 @@
*/
#include "nir.h"
#include "nir_builder.h"
/**
* \file nir_opt_intrinsics.c
*/
static bool
-opt_intrinsics_impl(nir_function_impl *impl)
+opt_intrinsics_impl(nir_function_impl *impl,
+ const struct nir_shader_compiler_options *options)
{
nir_builder b;
nir_builder_init(&b, impl);
bool progress = false;
nir_foreach_block(block, impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
@@ -48,20 +49,55 @@ opt_intrinsics_impl(nir_function_impl *impl)
case nir_intrinsic_vote_any:
case nir_intrinsic_vote_all:
if (nir_src_is_const(intrin->src[0]))
replacement = nir_ssa_for_src(&b, intrin->src[0], 1);
break;
case nir_intrinsic_vote_feq:
case nir_intrinsic_vote_ieq:
if (nir_src_is_const(intrin->src[0]))
replacement = nir_imm_true(&b);
break;
+ case nir_intrinsic_load_sample_mask_in:
+ /* Transform:
+ * gl_SampleMaskIn == 0 ---> gl_HelperInvocation
+ * gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
+ */
+ if (!options->optimize_sample_mask_in)
+ continue;
+
+ nir_foreach_use_safe(use_src, &intrin->dest.ssa) {
+ if (use_src->parent_instr->type == nir_instr_type_alu) {
+ nir_alu_instr *alu = nir_instr_as_alu(use_src->parent_instr);
+
+ if (alu->op == nir_op_ieq ||
+ alu->op == nir_op_ine) {
+ /* Check for 0 in either operand. */
+ nir_const_value *const_val =
+ nir_src_as_const_value(alu->src[0].src);
+ if (!const_val)
+ const_val = nir_src_as_const_value(alu->src[1].src);
+ if (!const_val || const_val->i32[0] != 0)
+ continue;
+
+ nir_ssa_def *new_expr = nir_load_helper_invocation(&b, 1);
+
+ if (alu->op == nir_op_ine32)
+ new_expr = nir_inot(&b, new_expr);
+
+ nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
+ nir_src_for_ssa(new_expr));
+ nir_instr_remove(&alu->instr);
+ continue;
+ }
+ }
+ }
+ continue;
default:
break;
}
if (!replacement)
continue;
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
nir_src_for_ssa(replacement));
nir_instr_remove(instr);
@@ -74,19 +110,19 @@ opt_intrinsics_impl(nir_function_impl *impl)
bool
nir_opt_intrinsics(nir_shader *shader)
{
bool progress = false;
nir_foreach_function(function, shader) {
if (!function->impl)
continue;
- if (opt_intrinsics_impl(function->impl)) {
+ if (opt_intrinsics_impl(function->impl, shader->options)) {
progress = true;
nir_metadata_preserve(function->impl, nir_metadata_block_index |
nir_metadata_dominance);
}
}
return progress;
}
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index 58b56b34d13..2142d5a33f2 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -494,20 +494,21 @@ static const struct nir_shader_compiler_options nir_options = {
.lower_pack_snorm_2x16 = true,
.lower_pack_snorm_4x8 = true,
.lower_pack_unorm_2x16 = true,
.lower_pack_unorm_4x8 = true,
.lower_unpack_snorm_2x16 = true,
.lower_unpack_snorm_4x8 = true,
.lower_unpack_unorm_2x16 = true,
.lower_unpack_unorm_4x8 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+ .optimize_sample_mask_in = true,
.max_unroll_iterations = 32,
.native_integers = true,
};
static const void *
si_get_compiler_options(struct pipe_screen *screen,
enum pipe_shader_ir ir,
enum pipe_shader_type shader)
{
assert(ir == PIPE_SHADER_IR_NIR);
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index fb10869c9f9..8028ccf7110 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -497,20 +497,21 @@ st_glsl_to_nir_post_opts(struct st_context *st, struct gl_program *prog,
/* This has to be done last. Any operation the can cause
* prog->ParameterValues to get reallocated (e.g., anything that adds a
* program constant) has to happen before creating this linkage.
*/
_mesa_associate_uniform_storage(st->ctx, shader_program, prog, true);
st_set_prog_affected_state_flags(prog);
NIR_PASS_V(nir, st_nir_lower_builtin);
NIR_PASS_V(nir, gl_nir_lower_atomics, shader_program, true);
+ NIR_PASS_V(nir, nir_opt_intrinsics);
nir_variable_mode mask = nir_var_function_temp;
nir_remove_dead_variables(nir, mask);
if (st->ctx->_Shader->Flags & GLSL_DUMP) {
_mesa_log("\n");
_mesa_log("NIR IR for linked %s program %d:\n",
_mesa_shader_stage_to_string(prog->info.stage),
shader_program->Name);
nir_print_shader(nir, _mesa_get_log_file());
--
2.17.1
More information about the mesa-dev
mailing list