[Mesa-dev] [PATCH] nir: optimize gl_SampleMaskIn to gl_HelperInvocation for radeonsi when possible
Marek Olšák
maraeo at gmail.com
Tue Apr 16 03:14:14 UTC 2019
On Mon, Apr 15, 2019, 10:41 PM Timothy Arceri <tarceri at itsqueeze.com> wrote:
>
>
> On 16/4/19 7:03 am, Marek Olšák wrote:
> > ping
> >
> > On Tue, Apr 9, 2019 at 10:03 PM Marek Olšák <maraeo at gmail.com
> > <mailto:maraeo at gmail.com>> wrote:
> >
> > From: Marek Olšák <marek.olsak at amd.com <mailto:marek.olsak at amd.com>>
> >
> > ---
> > src/compiler/nir/nir.h | 8 +++++
> > src/compiler/nir/nir_opt_intrinsics.c | 40
> +++++++++++++++++++++--
> > src/gallium/drivers/radeonsi/si_get.c | 1 +
> > src/mesa/state_tracker/st_glsl_to_nir.cpp | 1 +
> > 4 files changed, 48 insertions(+), 2 deletions(-)
> >
> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > index 09950bf3398..d3874705235 100644
> > --- a/src/compiler/nir/nir.h
> > +++ b/src/compiler/nir/nir.h
> > @@ -2283,20 +2283,28 @@ typedef struct nir_shader_compiler_options {
> > *
> > * This depends on some possibly hw implementation details,
> > which may
> > * not be true for all hw. In particular that the FS is only
> > executed
> > * for covered samples or for helper invocations. So, do not
> > blindly
> > * enable this option.
> > *
> > * Note: See also issue #22 in ARB_shader_image_load_store
> > */
> > bool lower_helper_invocation;
> >
> > + /**
> > + * Convert gl_SampleMaskIn to gl_HelperInvocation as follows:
> > + *
> > + * gl_SampleMaskIn == 0 ---> gl_HelperInvocation
> > + * gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
> > + */
> > + bool optimize_sample_mask_in;
> > +
> > bool lower_cs_local_index_from_id;
> > bool lower_cs_local_id_from_index;
> >
> > bool lower_device_index_to_zero;
> >
> > /* Set if nir_lower_wpos_ytransform() should also invert
> > gl_PointCoord. */
> > bool lower_wpos_pntc;
> >
> > bool lower_hadd;
> > bool lower_add_sat;
> > diff --git a/src/compiler/nir/nir_opt_intrinsics.c
> > b/src/compiler/nir/nir_opt_intrinsics.c
> > index 7b054faa204..2f9e4e466c9 100644
> > --- a/src/compiler/nir/nir_opt_intrinsics.c
> > +++ b/src/compiler/nir/nir_opt_intrinsics.c
> > @@ -22,21 +22,22 @@
> > */
> >
> > #include "nir.h"
> > #include "nir_builder.h"
> >
> > /**
> > * \file nir_opt_intrinsics.c
> > */
> >
> > static bool
> > -opt_intrinsics_impl(nir_function_impl *impl)
> > +opt_intrinsics_impl(nir_function_impl *impl,
> > + const struct nir_shader_compiler_options
> *options)
> > {
> > nir_builder b;
> > nir_builder_init(&b, impl);
> > bool progress = false;
> >
> > nir_foreach_block(block, impl) {
> > nir_foreach_instr_safe(instr, block) {
> > if (instr->type != nir_instr_type_intrinsic)
> > continue;
> >
> > @@ -48,20 +49,55 @@ opt_intrinsics_impl(nir_function_impl *impl)
> > case nir_intrinsic_vote_any:
> > case nir_intrinsic_vote_all:
> > if (nir_src_is_const(intrin->src[0]))
> > replacement = nir_ssa_for_src(&b, intrin->src[0],
> 1);
> > break;
> > case nir_intrinsic_vote_feq:
> > case nir_intrinsic_vote_ieq:
> > if (nir_src_is_const(intrin->src[0]))
> > replacement = nir_imm_true(&b);
> > break;
> > + case nir_intrinsic_load_sample_mask_in:
> > + /* Transform:
> > + * gl_SampleMaskIn == 0 ---> gl_HelperInvocation
> > + * gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
> > + */
> > + if (!options->optimize_sample_mask_in)
> > + continue;
> > +
> > + nir_foreach_use_safe(use_src, &intrin->dest.ssa) {
> > + if (use_src->parent_instr->type ==
> nir_instr_type_alu) {
> > + nir_alu_instr *alu =
> > nir_instr_as_alu(use_src->parent_instr);
> > +
> > + if (alu->op == nir_op_ieq ||
> > + alu->op == nir_op_ine) {
> > + /* Check for 0 in either operand. */
> > + nir_const_value *const_val =
> > + nir_src_as_const_value(alu->src[0].src);
> > + if (!const_val)
> > + const_val =
> > nir_src_as_const_value(alu->src[1].src);
> > + if (!const_val || const_val->i32[0] != 0)
> > + continue;
> > +
> > + nir_ssa_def *new_expr =
> > nir_load_helper_invocation(&b, 1);
> > +
> > + if (alu->op == nir_op_ine32)
>
> How can this be nir_op_ine32 when the outer if condition is:
>
> alu->op == nir_op_ieq || alu->op == nir_op_ine
>
> ??
>
Yeah, that's a leftover from the previous version that used the 32 suffix
everywhere and didn't work.
Marek
> > + new_expr = nir_inot(&b, new_expr);
> > +
> > + nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
> > +
> > nir_src_for_ssa(new_expr));
> > + nir_instr_remove(&alu->instr);
> > + continue;
> > + }
> > + }
> > + }
> > + continue;
> > default:
> > break;
> > }
> >
> > if (!replacement)
> > continue;
> >
> > nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
> > nir_src_for_ssa(replacement));
> > nir_instr_remove(instr);
> > @@ -74,19 +110,19 @@ opt_intrinsics_impl(nir_function_impl *impl)
> >
> > bool
> > nir_opt_intrinsics(nir_shader *shader)
> > {
> > bool progress = false;
> >
> > nir_foreach_function(function, shader) {
> > if (!function->impl)
> > continue;
> >
> > - if (opt_intrinsics_impl(function->impl)) {
> > + if (opt_intrinsics_impl(function->impl, shader->options)) {
> > progress = true;
> > nir_metadata_preserve(function->impl,
> > nir_metadata_block_index |
> >
> > nir_metadata_dominance);
> > }
> > }
> >
> > return progress;
> > }
> > diff --git a/src/gallium/drivers/radeonsi/si_get.c
> > b/src/gallium/drivers/radeonsi/si_get.c
> > index 58b56b34d13..2142d5a33f2 100644
> > --- a/src/gallium/drivers/radeonsi/si_get.c
> > +++ b/src/gallium/drivers/radeonsi/si_get.c
> > @@ -494,20 +494,21 @@ static const struct
> > nir_shader_compiler_options nir_options = {
> > .lower_pack_snorm_2x16 = true,
> > .lower_pack_snorm_4x8 = true,
> > .lower_pack_unorm_2x16 = true,
> > .lower_pack_unorm_4x8 = true,
> > .lower_unpack_snorm_2x16 = true,
> > .lower_unpack_snorm_4x8 = true,
> > .lower_unpack_unorm_2x16 = true,
> > .lower_unpack_unorm_4x8 = true,
> > .lower_extract_byte = true,
> > .lower_extract_word = true,
> > + .optimize_sample_mask_in = true,
> > .max_unroll_iterations = 32,
> > .native_integers = true,
> > };
> >
> > static const void *
> > si_get_compiler_options(struct pipe_screen *screen,
> > enum pipe_shader_ir ir,
> > enum pipe_shader_type shader)
> > {
> > assert(ir == PIPE_SHADER_IR_NIR);
> > diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp
> > b/src/mesa/state_tracker/st_glsl_to_nir.cpp
> > index fb10869c9f9..8028ccf7110 100644
> > --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
> > +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
> > @@ -497,20 +497,21 @@ st_glsl_to_nir_post_opts(struct st_context
> > *st, struct gl_program *prog,
> > /* This has to be done last. Any operation the can cause
> > * prog->ParameterValues to get reallocated (e.g., anything
> > that adds a
> > * program constant) has to happen before creating this linkage.
> > */
> > _mesa_associate_uniform_storage(st->ctx, shader_program, prog,
> > true);
> >
> > st_set_prog_affected_state_flags(prog);
> >
> > NIR_PASS_V(nir, st_nir_lower_builtin);
> > NIR_PASS_V(nir, gl_nir_lower_atomics, shader_program, true);
> > + NIR_PASS_V(nir, nir_opt_intrinsics);
> >
> > nir_variable_mode mask = nir_var_function_temp;
> > nir_remove_dead_variables(nir, mask);
> >
> > if (st->ctx->_Shader->Flags & GLSL_DUMP) {
> > _mesa_log("\n");
> > _mesa_log("NIR IR for linked %s program %d:\n",
> > _mesa_shader_stage_to_string(prog->info.stage),
> > shader_program->Name);
> > nir_print_shader(nir, _mesa_get_log_file());
> > --
> > 2.17.1
> >
> >
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190415/88f76814/attachment-0001.html>
More information about the mesa-dev
mailing list