<p dir="ltr"><br>
On Dec 11, 2015 1:24 PM, "Kenneth Graunke" <<a href="mailto:kenneth@whitecape.org">kenneth@whitecape.org</a>> wrote:<br>
><br>
> Tessellation control shaders need to be careful when writing outputs.<br>
> Because multiple threads can concurrently write the same output<br>
> variables, we need to only write the exact components we were told.<br>
><br>
> Traditionally, for sub-vector writes, we've read the whole vector,<br>
> updated the temporary, and written the whole vector back.  This breaks<br>
> down with concurrent access.<br>
><br>
> This patch prepares the way for a solution by adding a writemask field<br>
> to store_var intrinsics, as well as the other store intrinsics.  It then<br>
> updates all produces to emit a writemask of "all channels enabled".  It<br>
> updates nir_lower_io to copy the writemask to output store intrinsics.<br>
><br>
> Finally, it updates nir_lower_vars_to_ssa to handle partial writemasks<br>
> by doing a read-modify-write cycle (which is safe, because local<br>
> variables are specific to a single thread).<br>
><br>
> This should have no functional change, since no one actually emits<br>
> partial writemasks yet.<br>
><br>
> v2: Make nir_validate momentarily assert that writemasks cover the<br>
>     complete value - we shouldn't have partial writemasks yet<br>
>     (requested by Jason Ekstrand).<br>
><br>
> Signed-off-by: Kenneth Graunke <<a href="mailto:kenneth@whitecape.org">kenneth@whitecape.org</a>><br>
> ---<br>
>  src/gallium/auxiliary/nir/tgsi_to_nir.c            |  1 +<br>
>  .../drivers/freedreno/ir3/ir3_compiler_nir.c       |  6 +++<br>
>  src/glsl/nir/glsl_to_nir.cpp                       |  2 +<br>
>  src/glsl/nir/nir_builder.h                         |  4 +-<br>
>  src/glsl/nir/nir_intrinsics.h                      | 12 +++---<br>
>  src/glsl/nir/nir_lower_gs_intrinsics.c             |  3 +-<br>
>  src/glsl/nir/nir_lower_io.c                        |  3 ++<br>
>  src/glsl/nir/nir_lower_locals_to_regs.c            |  2 +-<br>
>  src/glsl/nir/nir_lower_var_copies.c                |  1 +<br>
>  src/glsl/nir/nir_lower_vars_to_ssa.c               | 46 ++++++++++++++++------<br>
>  src/glsl/nir/nir_validate.c                        |  2 +<br>
>  src/mesa/program/prog_to_nir.c                     |  2 +<br>
>  12 files changed, 64 insertions(+), 20 deletions(-)<br>
><br>
> diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c<br>
> index 5def6d3..c9aa049 100644<br>
> --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c<br>
> +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c<br>
> @@ -1901,6 +1901,7 @@ ttn_emit_instruction(struct ttn_compile *c)<br>
>                                             &tgsi_dst->Indirect : NULL;<br>
><br>
>        store->num_components = 4;<br>
> +      store->const_index[0] = 0xf;<br>
>        store->variables[0] = ttn_array_deref(c, store, var, offset, indirect);<br>
>        store->src[0] = nir_src_for_reg(dest.dest.reg.reg);<br>
><br>
> diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c<br>
> index eea5c5e..6154a10 100644<br>
> --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c<br>
> +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c<br>
> @@ -1321,6 +1321,9 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)<br>
>         case nir_deref_array_type_direct:<br>
>                 /* direct access does not require anything special: */<br>
>                 for (int i = 0; i < intr->num_components; i++) {<br>
> +                       if (!(intr->const_index[0] & (1 << i)))<br>
> +                               continue;<br>
> +<br>
>                         unsigned n = darr->base_offset * 4 + i;<br>
>                         compile_assert(ctx, n < arr->length);<br>
>                         arr->arr[n] = src[i];<br>
> @@ -1333,6 +1336,9 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)<br>
>                 struct ir3_instruction *addr =<br>
>                                 get_addr(ctx, get_src(ctx, &darr->indirect)[0]);<br>
>                 for (int i = 0; i < intr->num_components; i++) {<br>
> +                       if (!(intr->const_index[0] & (1 << i)))<br>
> +                               continue;<br>
> +<br>
>                         struct ir3_instruction *store;<br>
>                         unsigned n = darr->base_offset * 4 + i;<br>
>                         compile_assert(ctx, n < arr->length);<br>
> diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp<br>
> index db8b0ca..14f3153 100644<br>
> --- a/src/glsl/nir/glsl_to_nir.cpp<br>
> +++ b/src/glsl/nir/glsl_to_nir.cpp<br>
> @@ -1068,6 +1068,7 @@ nir_visitor::visit(ir_call *ir)<br>
>           nir_intrinsic_instr *store_instr =<br>
>              nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);<br>
>           store_instr->num_components = ir->return_deref->type->vector_elements;<br>
> +         store_instr->const_index[0] = (1 << store_instr->num_components) - 1;<br>
><br>
>           store_instr->variables[0] =<br>
>              evaluate_deref(&store_instr->instr, ir->return_deref);<br>
> @@ -1166,6 +1167,7 @@ nir_visitor::visit(ir_assignment *ir)<br>
>     nir_intrinsic_instr *store =<br>
>        nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);<br>
>     store->num_components = ir->lhs->type->vector_elements;<br>
> +   store->const_index[0] = (1 << store->num_components) - 1;<br>
>     nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref);<br>
>     store->variables[0] = nir_deref_as_var(store_deref);<br>
>     store->src[0] = nir_src_for_ssa(src);<br>
> diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h<br>
> index b909f483..6478aad 100644<br>
> --- a/src/glsl/nir/nir_builder.h<br>
> +++ b/src/glsl/nir/nir_builder.h<br>
> @@ -310,13 +310,15 @@ nir_load_var(nir_builder *build, nir_variable *var)<br>
>  }<br>
><br>
>  static inline void<br>
> -nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value)<br>
> +nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value,<br>
> +              unsigned writemask)<br>
>  {<br>
>     const unsigned num_components = glsl_get_vector_elements(var->type);<br>
><br>
>     nir_intrinsic_instr *store =<br>
>        nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var);<br>
>     store->num_components = num_components;<br>
> +   store->const_index[0] = writemask;<br>
>     store->variables[0] = nir_deref_var_create(store, var);<br>
>     store->src[0] = nir_src_for_ssa(value);<br>
>     nir_builder_instr_insert(build, &store->instr);<br>
> diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h<br>
> index 9811fb3..b0cbd2f 100644<br>
> --- a/src/glsl/nir/nir_intrinsics.h<br>
> +++ b/src/glsl/nir/nir_intrinsics.h<br>
> @@ -43,7 +43,7 @@<br>
><br>
><br>
>  INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)<br>
> -INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0)<br>
> +INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, 0)<br>
>  INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)<br>
><br>
>  /*<br>
> @@ -302,12 +302,12 @@ LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)<br>
>  #define STORE(name, srcs, indices, flags) \<br>
>     INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices, flags)<br>
><br>
> -/* src[] = { value, offset }. const_index[] = { base } */<br>
> -STORE(output, 2, 1, 0)<br>
> -/* src[] = { value, vertex, offset }. const_index[] = { base } */<br>
> -STORE(per_vertex_output, 3, 1, 0)<br>
> +/* src[] = { value, offset }. const_index[] = { base, write_mask } */<br>
> +STORE(output, 2, 2, 0)<br>
> +/* src[] = { value, vertex, offset }. const_index[] = { base, write_mask } */<br>
> +STORE(per_vertex_output, 3, 2, 0)<br>
>  /* src[] = { value, block_index, offset }. const_index[] = { write_mask } */<br>
> -STORE(ssbo, 3, 1, 0)<br>
> +STORE(ssbo, 3, 2, 0)</p>
<p dir="ltr">I don't think you meant to change SSBOs.</p>
<p dir="ltr">>  /* src[] = { value, offset }. const_index[] = { base, write_mask } */<br>
>  STORE(shared, 2, 1, 0)<br>
><br>
> diff --git a/src/glsl/nir/nir_lower_gs_intrinsics.c b/src/glsl/nir/nir_lower_gs_intrinsics.c<br>
> index e0d0678..1325459 100644<br>
> --- a/src/glsl/nir/nir_lower_gs_intrinsics.c<br>
> +++ b/src/glsl/nir/nir_lower_gs_intrinsics.c<br>
> @@ -99,7 +99,8 @@ rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct state *state)<br>
><br>
>     /* Increment the vertex count by 1 */<br>
>     nir_store_var(b, state->vertex_count_var,<br>
> -                 nir_iadd(b, count, nir_imm_int(b, 1)));<br>
> +                 nir_iadd(b, count, nir_imm_int(b, 1)),<br>
> +                 0x1); /* .x */<br>
><br>
>     nir_instr_remove(&intrin->instr);<br>
><br>
> diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c<br>
> index 3d646eb..a3565cc 100644<br>
> --- a/src/glsl/nir/nir_lower_io.c<br>
> +++ b/src/glsl/nir/nir_lower_io.c<br>
> @@ -261,6 +261,9 @@ nir_lower_io_block(nir_block *block, void *void_state)<br>
>           store->const_index[0] =<br>
>              intrin->variables[0]->var->data.driver_location;<br>
><br>
> +         /* Copy the writemask */<br>
> +         store->const_index[1] = intrin->const_index[0];<br>
> +<br>
>           if (per_vertex)<br>
>              store->src[1] = nir_src_for_ssa(vertex_index);<br>
><br>
> diff --git a/src/glsl/nir/nir_lower_locals_to_regs.c b/src/glsl/nir/nir_lower_locals_to_regs.c<br>
> index 17b53ca..3e21ac0 100644<br>
> --- a/src/glsl/nir/nir_lower_locals_to_regs.c<br>
> +++ b/src/glsl/nir/nir_lower_locals_to_regs.c<br>
> @@ -243,7 +243,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)<br>
><br>
>           nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov);<br>
>           nir_src_copy(&mov->src[0].src, &intrin->src[0], mov);<br>
> -         mov->dest.write_mask = (1 << intrin->num_components) - 1;<br>
> +         mov->dest.write_mask = intrin->const_index[0];<br>
>           mov->dest.dest.is_ssa = false;<br>
>           mov->dest.dest.reg.reg = reg_src.reg.reg;<br>
>           mov->dest.dest.reg.base_offset = reg_src.reg.base_offset;<br>
> diff --git a/src/glsl/nir/nir_lower_var_copies.c b/src/glsl/nir/nir_lower_var_copies.c<br>
> index 98c107a..a9017de 100644<br>
> --- a/src/glsl/nir/nir_lower_var_copies.c<br>
> +++ b/src/glsl/nir/nir_lower_var_copies.c<br>
> @@ -128,6 +128,7 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,<br>
>        nir_intrinsic_instr *store =<br>
>           nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var);<br>
>        store->num_components = num_components;<br>
> +      store->const_index[0] = (1 << num_components) - 1;<br>
>        store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref));<br>
><br>
>        store->src[0].is_ssa = true;<br>
> diff --git a/src/glsl/nir/nir_lower_vars_to_ssa.c b/src/glsl/nir/nir_lower_vars_to_ssa.c<br>
> index e670dbd..3ec0e1d 100644<br>
> --- a/src/glsl/nir/nir_lower_vars_to_ssa.c<br>
> +++ b/src/glsl/nir/nir_lower_vars_to_ssa.c<br>
> @@ -26,6 +26,7 @@<br>
>   */<br>
><br>
>  #include "nir.h"<br>
> +#include "nir_builder.h"<br>
>  #include "nir_vla.h"<br>
><br>
><br>
> @@ -590,6 +591,9 @@ add_phi_sources(nir_block *block, nir_block *pred,<br>
>  static bool<br>
>  rename_variables_block(nir_block *block, struct lower_variables_state *state)<br>
>  {<br>
> +   nir_builder b;<br>
> +   nir_builder_init(&b, state->impl);<br>
> +<br>
>     nir_foreach_instr_safe(block, instr) {<br>
>        if (instr->type == nir_instr_type_phi) {<br>
>           nir_phi_instr *phi = nir_instr_as_phi(instr);<br>
> @@ -675,20 +679,40 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)<br>
><br>
>              assert(intrin->src[0].is_ssa);<br>
><br>
> -            nir_alu_instr *mov = nir_alu_instr_create(state->shader,<br>
> -                                                      nir_op_imov);<br>
> -            mov->src[0].src.is_ssa = true;<br>
> -            mov->src[0].src.ssa = intrin->src[0].ssa;<br>
> -            for (unsigned i = intrin->num_components; i < 4; i++)<br>
> -               mov->src[0].swizzle[i] = 0;<br>
> +            nir_ssa_def *new_def;<br>
> +            b.cursor = nir_before_instr(&intrin->instr);<br>
><br>
> -            mov->dest.write_mask = (1 << intrin->num_components) - 1;<br>
> -            nir_ssa_dest_init(&mov->instr, &mov->dest.dest,<br>
> -                              intrin->num_components, NULL);<br>
> +            if (intrin->const_index[0] == (1 << intrin->num_components) - 1) {<br>
> +               /* Whole variable store - just copy the source.  Note that<br>
> +                * intrin->num_components and intrin->src[0].ssa->num_components<br>
> +                * may differ.<br>
> +                */<br>
> +               unsigned swiz[4];<br>
> +               for (unsigned i = 0; i < 4; i++)<br>
> +                  swiz[i] = i < intrin->num_components ? i : 0;<br>
> +<br>
> +               new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,<br>
> +                                     intrin->num_components, false);<br>
> +            } else {<br>
> +               nir_ssa_def *old_def = get_ssa_def_for_block(node, block, state);<br>
> +               /* For writemasked store_var intrinsics, we combine the newly<br>
> +                * written values with the existing contents of unwritten<br>
> +                * channels, creating a new SSA value for the whole vector.<br>
> +                */<br>
> +               nir_ssa_def *srcs[4];<br>
> +               for (unsigned i = 0; i < intrin->num_components; i++) {<br>
> +                  if (intrin->const_index[0] & (1 << i)) {<br>
> +                     srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);<br>
> +                  } else {<br>
> +                     srcs[i] = nir_channel(&b, old_def, i);<br>
> +                  }<br>
> +               }<br>
> +               new_def = nir_vec(&b, srcs, intrin->num_components);<br>
> +            }<br>
><br>
> -            nir_instr_insert_before(&intrin->instr, &mov->instr);<br>
> +            assert(new_def->num_components == intrin->num_components);<br>
><br>
> -            def_stack_push(node, &mov->dest.dest.ssa, state);<br>
> +            def_stack_push(node, new_def, state);<br>
><br>
>              /* We'll wait to remove the instruction until the next pass<br>
>               * where we pop the node we just pushed back off the stack.<br>
> diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c<br>
> index 06879d6..89cf0b8 100644<br>
> --- a/src/glsl/nir/nir_validate.c<br>
> +++ b/src/glsl/nir/nir_validate.c<br>
> @@ -417,6 +417,8 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)<br>
>        assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&<br>
>               instr->variables[0]->var->data.mode != nir_var_uniform &&<br>
>               instr->variables[0]->var->data.mode != nir_var_shader_storage);<br>
> +      /* Currently, writemasks must cover the entire value */<br>
> +      assert(instr->const_index[0] == (1 << instr->num_components) - 1);<br>
>        break;<br>
>     }<br>
>     case nir_intrinsic_copy_var:<br>
> diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c<br>
> index 539e3c0..48f47b8 100644<br>
> --- a/src/mesa/program/prog_to_nir.c<br>
> +++ b/src/mesa/program/prog_to_nir.c<br>
> @@ -927,6 +927,7 @@ ptn_add_output_stores(struct ptn_compile *c)<br>
>        nir_intrinsic_instr *store =<br>
>           nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);<br>
>        store->num_components = glsl_get_vector_elements(var->type);<br>
> +      store->const_index[0] = (1 << store->num_components) - 1;<br>
>        store->variables[0] =<br>
>           nir_deref_var_create(store, c->output_vars[var->data.location]);<br>
><br>
> @@ -997,6 +998,7 @@ setup_registers_and_variables(struct ptn_compile *c)<br>
>              nir_intrinsic_instr *store =<br>
>                 nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);<br>
>              store->num_components = 4;<br>
> +            store->const_index[0] = WRITEMASK_XYZW;<br>
>              store->variables[0] = nir_deref_var_create(store, fullvar);<br>
>              store->src[0] = nir_src_for_ssa(f001);<br>
>              nir_builder_instr_insert(b, &store->instr);<br>
> --<br>
> 2.6.3<br>
><br>
> _______________________________________________<br>
> mesa-dev mailing list<br>
><a href="mailto:mesa-dev@lists.freedesktop.org"> mesa-dev@lists.freedesktop.org</a><br>
><a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev"> http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</p>