[Mesa-dev] [PATCH 01/26] nir: Add a writemask to store intrinsics.

Kenneth Graunke kenneth at whitecape.org
Wed Dec 2 16:15:42 PST 2015


Tessellation control shaders need to be careful when writing outputs.
Because multiple threads can concurrently write the same output
variables, we need to only write the exact components we were told.

Traditionally, for sub-vector writes, we've read the whole vector,
updated the temporary, and written the whole vector back.  This breaks
down with concurrent access.

This patch prepares the way for a solution by adding a writemask field
to store_var intrinsics, as well as the other store intrinsics.  It then
updates all produces to emit a writemask of "all channels enabled".  It
updates nir_lower_io to copy the writemask to output store intrinsics.

Finally, it updates nir_lower_vars_to_ssa to handle partial writemasks
by doing a read-modify-write cycle (which is safe, because local
variables are specific to a single thread).

This should have no functional change, since no one actually emits
partial writemasks yet.

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/gallium/auxiliary/nir/tgsi_to_nir.c            |  1 +
 .../drivers/freedreno/ir3/ir3_compiler_nir.c       |  6 +++
 src/glsl/nir/glsl_to_nir.cpp                       |  2 +
 src/glsl/nir/nir_builder.h                         |  4 +-
 src/glsl/nir/nir_intrinsics.h                      | 14 +++----
 src/glsl/nir/nir_lower_gs_intrinsics.c             |  3 +-
 src/glsl/nir/nir_lower_io.c                        |  2 +
 src/glsl/nir/nir_lower_locals_to_regs.c            |  2 +-
 src/glsl/nir/nir_lower_var_copies.c                |  1 +
 src/glsl/nir/nir_lower_vars_to_ssa.c               | 46 ++++++++++++++++------
 src/glsl/nir/nir_validate.c                        |  1 +
 src/mesa/program/prog_to_nir.c                     |  2 +
 12 files changed, 63 insertions(+), 21 deletions(-)

Technically, I don't think I need the ir3 changes here - it should work
without any changes.  I think I was just overzealously preparing things
to handle writemasks before I realized there shouldn't be any.

But, it might be worth doing anyway.  Not sure.

Jason and I have already talked about the fact that we're conflicting.
I like what he's doing, and he suggested this; we just need to figure out
what lands first.  I decided to send these out anyway for people to look
at and start reviewing, since there's a lot to do there.  We'll sort it
out in v2.

diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 86c2ffa..f5547c8 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -1894,6 +1894,7 @@ ttn_emit_instruction(struct ttn_compile *c)
                                            &tgsi_dst->Indirect : NULL;
 
       store->num_components = 4;
+      store->const_index[0] = 0xf;
       store->variables[0] = ttn_array_deref(c, store, var, offset, indirect);
       store->src[0] = nir_src_for_reg(dest.dest.reg.reg);
 
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 8617704..3beed0e 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1314,6 +1314,9 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
 	case nir_deref_array_type_direct:
 		/* direct access does not require anything special: */
 		for (int i = 0; i < intr->num_components; i++) {
+			if (!(intr->const_index[0] & (1 << i)))
+				continue;
+
 			unsigned n = darr->base_offset * 4 + i;
 			compile_assert(ctx, n < arr->length);
 			arr->arr[n] = src[i];
@@ -1326,6 +1329,9 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
 		struct ir3_instruction *addr =
 				get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
 		for (int i = 0; i < intr->num_components; i++) {
+			if (!(intr->const_index[0] & (1 << i)))
+				continue;
+
 			struct ir3_instruction *store;
 			unsigned n = darr->base_offset * 4 + i;
 			compile_assert(ctx, n < arr->length);
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 45d045c..5c84b0d 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -982,6 +982,7 @@ nir_visitor::visit(ir_call *ir)
          nir_intrinsic_instr *store_instr =
             nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
          store_instr->num_components = ir->return_deref->type->vector_elements;
+         store_instr->const_index[0] = (1 << store_instr->num_components) - 1;
 
          store_instr->variables[0] =
             evaluate_deref(&store_instr->instr, ir->return_deref);
@@ -1080,6 +1081,7 @@ nir_visitor::visit(ir_assignment *ir)
    nir_intrinsic_instr *store =
       nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
    store->num_components = ir->lhs->type->vector_elements;
+   store->const_index[0] = (1 << store->num_components) - 1;
    nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref);
    store->variables[0] = nir_deref_as_var(store_deref);
    store->src[0] = nir_src_for_ssa(src);
diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h
index b909f483..6478aad 100644
--- a/src/glsl/nir/nir_builder.h
+++ b/src/glsl/nir/nir_builder.h
@@ -310,13 +310,15 @@ nir_load_var(nir_builder *build, nir_variable *var)
 }
 
 static inline void
-nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value)
+nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value,
+              unsigned writemask)
 {
    const unsigned num_components = glsl_get_vector_elements(var->type);
 
    nir_intrinsic_instr *store =
       nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var);
    store->num_components = num_components;
+   store->const_index[0] = writemask;
    store->variables[0] = nir_deref_var_create(store, var);
    store->src[0] = nir_src_for_ssa(value);
    nir_builder_instr_insert(build, &store->instr);
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index b2565c5..013f2ac 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -43,7 +43,7 @@
 
 
 INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)
-INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0)
+INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, 0)
 INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
 
 /*
@@ -266,16 +266,16 @@ LOAD(per_vertex_output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
  * block index and an extra index with the writemask to use.
  */
 
-#define STORE(name, extra_srcs, extra_srcs_size, extra_indices, flags) \
+#define STORE(name, extra_srcs, extra_srcs_size, flags) \
    INTRINSIC(store_##name, 1 + extra_srcs, \
              ARR(0, extra_srcs_size, extra_srcs_size, extra_srcs_size), \
-             false, 0, 0, 1 + extra_indices, flags) \
+             false, 0, 0, 2, flags) \
    INTRINSIC(store_##name##_indirect, 2 + extra_srcs, \
              ARR(0, 1, extra_srcs_size, extra_srcs_size), \
-             false, 0, 0, 1 + extra_indices, flags)
+             false, 0, 0, 2, flags)
 
-STORE(output, 0, 0, 0, 0)
-STORE(per_vertex_output, 1, 1, 0, 0)
-STORE(ssbo, 1, 1, 1, 0)
+STORE(output, 0, 0, 0)
+STORE(per_vertex_output, 1, 1, 0)
+STORE(ssbo, 1, 1, 0)
 
 LAST_INTRINSIC(store_ssbo_indirect)
diff --git a/src/glsl/nir/nir_lower_gs_intrinsics.c b/src/glsl/nir/nir_lower_gs_intrinsics.c
index e0d0678..7cde839 100644
--- a/src/glsl/nir/nir_lower_gs_intrinsics.c
+++ b/src/glsl/nir/nir_lower_gs_intrinsics.c
@@ -99,7 +99,8 @@ rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct state *state)
 
    /* Increment the vertex count by 1 */
    nir_store_var(b, state->vertex_count_var,
-                 nir_iadd(b, count, nir_imm_int(b, 1)));
+                 nir_iadd(b, count, nir_imm_int(b, 1)),
+                 1); /* .x */
 
    nir_instr_remove(&intrin->instr);
 
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
index 5683e69..7bbc333 100644
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -279,6 +279,8 @@ nir_lower_io_block(nir_block *block, void *void_state)
                                                                  store_op);
          store->num_components = intrin->num_components;
          store->const_index[0] = offset;
+         /* Copy the writemask */
+         store->const_index[1] = intrin->const_index[0];
 
          nir_src_copy(&store->src[0], &intrin->src[0], store);
 
diff --git a/src/glsl/nir/nir_lower_locals_to_regs.c b/src/glsl/nir/nir_lower_locals_to_regs.c
index 17b53ca..3e21ac0 100644
--- a/src/glsl/nir/nir_lower_locals_to_regs.c
+++ b/src/glsl/nir/nir_lower_locals_to_regs.c
@@ -243,7 +243,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
 
          nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov);
          nir_src_copy(&mov->src[0].src, &intrin->src[0], mov);
-         mov->dest.write_mask = (1 << intrin->num_components) - 1;
+         mov->dest.write_mask = intrin->const_index[0];
          mov->dest.dest.is_ssa = false;
          mov->dest.dest.reg.reg = reg_src.reg.reg;
          mov->dest.dest.reg.base_offset = reg_src.reg.base_offset;
diff --git a/src/glsl/nir/nir_lower_var_copies.c b/src/glsl/nir/nir_lower_var_copies.c
index 98c107a..a9017de 100644
--- a/src/glsl/nir/nir_lower_var_copies.c
+++ b/src/glsl/nir/nir_lower_var_copies.c
@@ -128,6 +128,7 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
       nir_intrinsic_instr *store =
          nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var);
       store->num_components = num_components;
+      store->const_index[0] = (1 << num_components) - 1;
       store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref));
 
       store->src[0].is_ssa = true;
diff --git a/src/glsl/nir/nir_lower_vars_to_ssa.c b/src/glsl/nir/nir_lower_vars_to_ssa.c
index e670dbd..3ec0e1d 100644
--- a/src/glsl/nir/nir_lower_vars_to_ssa.c
+++ b/src/glsl/nir/nir_lower_vars_to_ssa.c
@@ -26,6 +26,7 @@
  */
 
 #include "nir.h"
+#include "nir_builder.h"
 #include "nir_vla.h"
 
 
@@ -590,6 +591,9 @@ add_phi_sources(nir_block *block, nir_block *pred,
 static bool
 rename_variables_block(nir_block *block, struct lower_variables_state *state)
 {
+   nir_builder b;
+   nir_builder_init(&b, state->impl);
+
    nir_foreach_instr_safe(block, instr) {
       if (instr->type == nir_instr_type_phi) {
          nir_phi_instr *phi = nir_instr_as_phi(instr);
@@ -675,20 +679,40 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
 
             assert(intrin->src[0].is_ssa);
 
-            nir_alu_instr *mov = nir_alu_instr_create(state->shader,
-                                                      nir_op_imov);
-            mov->src[0].src.is_ssa = true;
-            mov->src[0].src.ssa = intrin->src[0].ssa;
-            for (unsigned i = intrin->num_components; i < 4; i++)
-               mov->src[0].swizzle[i] = 0;
+            nir_ssa_def *new_def;
+            b.cursor = nir_before_instr(&intrin->instr);
 
-            mov->dest.write_mask = (1 << intrin->num_components) - 1;
-            nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
-                              intrin->num_components, NULL);
+            if (intrin->const_index[0] == (1 << intrin->num_components) - 1) {
+               /* Whole variable store - just copy the source.  Note that
+                * intrin->num_components and intrin->src[0].ssa->num_components
+                * may differ.
+                */
+               unsigned swiz[4];
+               for (unsigned i = 0; i < 4; i++)
+                  swiz[i] = i < intrin->num_components ? i : 0;
+
+               new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
+                                     intrin->num_components, false);
+            } else {
+               nir_ssa_def *old_def = get_ssa_def_for_block(node, block, state);
+               /* For writemasked store_var intrinsics, we combine the newly
+                * written values with the existing contents of unwritten
+                * channels, creating a new SSA value for the whole vector.
+                */
+               nir_ssa_def *srcs[4];
+               for (unsigned i = 0; i < intrin->num_components; i++) {
+                  if (intrin->const_index[0] & (1 << i)) {
+                     srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
+                  } else {
+                     srcs[i] = nir_channel(&b, old_def, i);
+                  }
+               }
+               new_def = nir_vec(&b, srcs, intrin->num_components);
+            }
 
-            nir_instr_insert_before(&intrin->instr, &mov->instr);
+            assert(new_def->num_components == intrin->num_components);
 
-            def_stack_push(node, &mov->dest.dest.ssa, state);
+            def_stack_push(node, new_def, state);
 
             /* We'll wait to remove the instruction until the next pass
              * where we pop the node we just pushed back off the stack.
diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c
index 06879d6..da92055 100644
--- a/src/glsl/nir/nir_validate.c
+++ b/src/glsl/nir/nir_validate.c
@@ -417,6 +417,7 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
       assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
              instr->variables[0]->var->data.mode != nir_var_uniform &&
              instr->variables[0]->var->data.mode != nir_var_shader_storage);
+      assert((instr->const_index[0] & ~((1 << instr->num_components) - 1)) == 0);
       break;
    }
    case nir_intrinsic_copy_var:
diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c
index 539e3c0..48f47b8 100644
--- a/src/mesa/program/prog_to_nir.c
+++ b/src/mesa/program/prog_to_nir.c
@@ -927,6 +927,7 @@ ptn_add_output_stores(struct ptn_compile *c)
       nir_intrinsic_instr *store =
          nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
       store->num_components = glsl_get_vector_elements(var->type);
+      store->const_index[0] = (1 << store->num_components) - 1;
       store->variables[0] =
          nir_deref_var_create(store, c->output_vars[var->data.location]);
 
@@ -997,6 +998,7 @@ setup_registers_and_variables(struct ptn_compile *c)
             nir_intrinsic_instr *store =
                nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
             store->num_components = 4;
+            store->const_index[0] = WRITEMASK_XYZW;
             store->variables[0] = nir_deref_var_create(store, fullvar);
             store->src[0] = nir_src_for_ssa(f001);
             nir_builder_instr_insert(b, &store->instr);
-- 
2.6.2



More information about the mesa-dev mailing list