[Mesa-dev] [PATCH v2 03/13] nir: Add a writemask to store intrinsics.
Jason Ekstrand
jason at jlekstrand.net
Sat Dec 12 08:29:37 PST 2015
On Dec 11, 2015 1:24 PM, "Kenneth Graunke" <kenneth at whitecape.org> wrote:
>
> Tessellation control shaders need to be careful when writing outputs.
> Because multiple threads can concurrently write the same output
> variables, we need to only write the exact components we were told.
>
> Traditionally, for sub-vector writes, we've read the whole vector,
> updated the temporary, and written the whole vector back. This breaks
> down with concurrent access.
>
> This patch prepares the way for a solution by adding a writemask field
> to store_var intrinsics, as well as the other store intrinsics. It then
> updates all produces to emit a writemask of "all channels enabled". It
> updates nir_lower_io to copy the writemask to output store intrinsics.
>
> Finally, it updates nir_lower_vars_to_ssa to handle partial writemasks
> by doing a read-modify-write cycle (which is safe, because local
> variables are specific to a single thread).
>
> This should have no functional change, since no one actually emits
> partial writemasks yet.
>
> v2: Make nir_validate momentarily assert that writemasks cover the
> complete value - we shouldn't have partial writemasks yet
> (requested by Jason Ekstrand).
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
> src/gallium/auxiliary/nir/tgsi_to_nir.c | 1 +
> .../drivers/freedreno/ir3/ir3_compiler_nir.c | 6 +++
> src/glsl/nir/glsl_to_nir.cpp | 2 +
> src/glsl/nir/nir_builder.h | 4 +-
> src/glsl/nir/nir_intrinsics.h | 12 +++---
> src/glsl/nir/nir_lower_gs_intrinsics.c | 3 +-
> src/glsl/nir/nir_lower_io.c | 3 ++
> src/glsl/nir/nir_lower_locals_to_regs.c | 2 +-
> src/glsl/nir/nir_lower_var_copies.c | 1 +
> src/glsl/nir/nir_lower_vars_to_ssa.c | 46
++++++++++++++++------
> src/glsl/nir/nir_validate.c | 2 +
> src/mesa/program/prog_to_nir.c | 2 +
> 12 files changed, 64 insertions(+), 20 deletions(-)
>
> diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c
b/src/gallium/auxiliary/nir/tgsi_to_nir.c
> index 5def6d3..c9aa049 100644
> --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
> +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
> @@ -1901,6 +1901,7 @@ ttn_emit_instruction(struct ttn_compile *c)
> &tgsi_dst->Indirect : NULL;
>
> store->num_components = 4;
> + store->const_index[0] = 0xf;
> store->variables[0] = ttn_array_deref(c, store, var, offset,
indirect);
> store->src[0] = nir_src_for_reg(dest.dest.reg.reg);
>
> diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
> index eea5c5e..6154a10 100644
> --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
> +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
> @@ -1321,6 +1321,9 @@ emit_intrinisic_store_var(struct ir3_compile *ctx,
nir_intrinsic_instr *intr)
> case nir_deref_array_type_direct:
> /* direct access does not require anything special: */
> for (int i = 0; i < intr->num_components; i++) {
> + if (!(intr->const_index[0] & (1 << i)))
> + continue;
> +
> unsigned n = darr->base_offset * 4 + i;
> compile_assert(ctx, n < arr->length);
> arr->arr[n] = src[i];
> @@ -1333,6 +1336,9 @@ emit_intrinisic_store_var(struct ir3_compile *ctx,
nir_intrinsic_instr *intr)
> struct ir3_instruction *addr =
> get_addr(ctx, get_src(ctx,
&darr->indirect)[0]);
> for (int i = 0; i < intr->num_components; i++) {
> + if (!(intr->const_index[0] & (1 << i)))
> + continue;
> +
> struct ir3_instruction *store;
> unsigned n = darr->base_offset * 4 + i;
> compile_assert(ctx, n < arr->length);
> diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
> index db8b0ca..14f3153 100644
> --- a/src/glsl/nir/glsl_to_nir.cpp
> +++ b/src/glsl/nir/glsl_to_nir.cpp
> @@ -1068,6 +1068,7 @@ nir_visitor::visit(ir_call *ir)
> nir_intrinsic_instr *store_instr =
> nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
> store_instr->num_components =
ir->return_deref->type->vector_elements;
> + store_instr->const_index[0] = (1 <<
store_instr->num_components) - 1;
>
> store_instr->variables[0] =
> evaluate_deref(&store_instr->instr, ir->return_deref);
> @@ -1166,6 +1167,7 @@ nir_visitor::visit(ir_assignment *ir)
> nir_intrinsic_instr *store =
> nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
> store->num_components = ir->lhs->type->vector_elements;
> + store->const_index[0] = (1 << store->num_components) - 1;
> nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref);
> store->variables[0] = nir_deref_as_var(store_deref);
> store->src[0] = nir_src_for_ssa(src);
> diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h
> index b909f483..6478aad 100644
> --- a/src/glsl/nir/nir_builder.h
> +++ b/src/glsl/nir/nir_builder.h
> @@ -310,13 +310,15 @@ nir_load_var(nir_builder *build, nir_variable *var)
> }
>
> static inline void
> -nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value)
> +nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value,
> + unsigned writemask)
> {
> const unsigned num_components = glsl_get_vector_elements(var->type);
>
> nir_intrinsic_instr *store =
> nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var);
> store->num_components = num_components;
> + store->const_index[0] = writemask;
> store->variables[0] = nir_deref_var_create(store, var);
> store->src[0] = nir_src_for_ssa(value);
> nir_builder_instr_insert(build, &store->instr);
> diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
> index 9811fb3..b0cbd2f 100644
> --- a/src/glsl/nir/nir_intrinsics.h
> +++ b/src/glsl/nir/nir_intrinsics.h
> @@ -43,7 +43,7 @@
>
>
> INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)
> -INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0)
> +INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, 0)
> INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
>
> /*
> @@ -302,12 +302,12 @@ LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
> #define STORE(name, srcs, indices, flags) \
> INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices,
flags)
>
> -/* src[] = { value, offset }. const_index[] = { base } */
> -STORE(output, 2, 1, 0)
> -/* src[] = { value, vertex, offset }. const_index[] = { base } */
> -STORE(per_vertex_output, 3, 1, 0)
> +/* src[] = { value, offset }. const_index[] = { base, write_mask } */
> +STORE(output, 2, 2, 0)
> +/* src[] = { value, vertex, offset }. const_index[] = { base, write_mask
} */
> +STORE(per_vertex_output, 3, 2, 0)
> /* src[] = { value, block_index, offset }. const_index[] = { write_mask
} */
> -STORE(ssbo, 3, 1, 0)
> +STORE(ssbo, 3, 2, 0)
I don't think you meant to change SSBOs.
> /* src[] = { value, offset }. const_index[] = { base, write_mask } */
> STORE(shared, 2, 1, 0)
>
> diff --git a/src/glsl/nir/nir_lower_gs_intrinsics.c
b/src/glsl/nir/nir_lower_gs_intrinsics.c
> index e0d0678..1325459 100644
> --- a/src/glsl/nir/nir_lower_gs_intrinsics.c
> +++ b/src/glsl/nir/nir_lower_gs_intrinsics.c
> @@ -99,7 +99,8 @@ rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct
state *state)
>
> /* Increment the vertex count by 1 */
> nir_store_var(b, state->vertex_count_var,
> - nir_iadd(b, count, nir_imm_int(b, 1)));
> + nir_iadd(b, count, nir_imm_int(b, 1)),
> + 0x1); /* .x */
>
> nir_instr_remove(&intrin->instr);
>
> diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
> index 3d646eb..a3565cc 100644
> --- a/src/glsl/nir/nir_lower_io.c
> +++ b/src/glsl/nir/nir_lower_io.c
> @@ -261,6 +261,9 @@ nir_lower_io_block(nir_block *block, void *void_state)
> store->const_index[0] =
> intrin->variables[0]->var->data.driver_location;
>
> + /* Copy the writemask */
> + store->const_index[1] = intrin->const_index[0];
> +
> if (per_vertex)
> store->src[1] = nir_src_for_ssa(vertex_index);
>
> diff --git a/src/glsl/nir/nir_lower_locals_to_regs.c
b/src/glsl/nir/nir_lower_locals_to_regs.c
> index 17b53ca..3e21ac0 100644
> --- a/src/glsl/nir/nir_lower_locals_to_regs.c
> +++ b/src/glsl/nir/nir_lower_locals_to_regs.c
> @@ -243,7 +243,7 @@ lower_locals_to_regs_block(nir_block *block, void
*void_state)
>
> nir_alu_instr *mov = nir_alu_instr_create(state->shader,
nir_op_imov);
> nir_src_copy(&mov->src[0].src, &intrin->src[0], mov);
> - mov->dest.write_mask = (1 << intrin->num_components) - 1;
> + mov->dest.write_mask = intrin->const_index[0];
> mov->dest.dest.is_ssa = false;
> mov->dest.dest.reg.reg = reg_src.reg.reg;
> mov->dest.dest.reg.base_offset = reg_src.reg.base_offset;
> diff --git a/src/glsl/nir/nir_lower_var_copies.c
b/src/glsl/nir/nir_lower_var_copies.c
> index 98c107a..a9017de 100644
> --- a/src/glsl/nir/nir_lower_var_copies.c
> +++ b/src/glsl/nir/nir_lower_var_copies.c
> @@ -128,6 +128,7 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
> nir_intrinsic_instr *store =
> nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var);
> store->num_components = num_components;
> + store->const_index[0] = (1 << num_components) - 1;
> store->variables[0] = nir_deref_as_var(nir_copy_deref(store,
&dest_head->deref));
>
> store->src[0].is_ssa = true;
> diff --git a/src/glsl/nir/nir_lower_vars_to_ssa.c
b/src/glsl/nir/nir_lower_vars_to_ssa.c
> index e670dbd..3ec0e1d 100644
> --- a/src/glsl/nir/nir_lower_vars_to_ssa.c
> +++ b/src/glsl/nir/nir_lower_vars_to_ssa.c
> @@ -26,6 +26,7 @@
> */
>
> #include "nir.h"
> +#include "nir_builder.h"
> #include "nir_vla.h"
>
>
> @@ -590,6 +591,9 @@ add_phi_sources(nir_block *block, nir_block *pred,
> static bool
> rename_variables_block(nir_block *block, struct lower_variables_state
*state)
> {
> + nir_builder b;
> + nir_builder_init(&b, state->impl);
> +
> nir_foreach_instr_safe(block, instr) {
> if (instr->type == nir_instr_type_phi) {
> nir_phi_instr *phi = nir_instr_as_phi(instr);
> @@ -675,20 +679,40 @@ rename_variables_block(nir_block *block, struct
lower_variables_state *state)
>
> assert(intrin->src[0].is_ssa);
>
> - nir_alu_instr *mov = nir_alu_instr_create(state->shader,
> - nir_op_imov);
> - mov->src[0].src.is_ssa = true;
> - mov->src[0].src.ssa = intrin->src[0].ssa;
> - for (unsigned i = intrin->num_components; i < 4; i++)
> - mov->src[0].swizzle[i] = 0;
> + nir_ssa_def *new_def;
> + b.cursor = nir_before_instr(&intrin->instr);
>
> - mov->dest.write_mask = (1 << intrin->num_components) - 1;
> - nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
> - intrin->num_components, NULL);
> + if (intrin->const_index[0] == (1 << intrin->num_components)
- 1) {
> + /* Whole variable store - just copy the source. Note that
> + * intrin->num_components and
intrin->src[0].ssa->num_components
> + * may differ.
> + */
> + unsigned swiz[4];
> + for (unsigned i = 0; i < 4; i++)
> + swiz[i] = i < intrin->num_components ? i : 0;
> +
> + new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
> + intrin->num_components, false);
> + } else {
> + nir_ssa_def *old_def = get_ssa_def_for_block(node, block,
state);
> + /* For writemasked store_var intrinsics, we combine the
newly
> + * written values with the existing contents of unwritten
> + * channels, creating a new SSA value for the whole
vector.
> + */
> + nir_ssa_def *srcs[4];
> + for (unsigned i = 0; i < intrin->num_components; i++) {
> + if (intrin->const_index[0] & (1 << i)) {
> + srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
> + } else {
> + srcs[i] = nir_channel(&b, old_def, i);
> + }
> + }
> + new_def = nir_vec(&b, srcs, intrin->num_components);
> + }
>
> - nir_instr_insert_before(&intrin->instr, &mov->instr);
> + assert(new_def->num_components == intrin->num_components);
>
> - def_stack_push(node, &mov->dest.dest.ssa, state);
> + def_stack_push(node, new_def, state);
>
> /* We'll wait to remove the instruction until the next pass
> * where we pop the node we just pushed back off the stack.
> diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c
> index 06879d6..89cf0b8 100644
> --- a/src/glsl/nir/nir_validate.c
> +++ b/src/glsl/nir/nir_validate.c
> @@ -417,6 +417,8 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr,
validate_state *state)
> assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
> instr->variables[0]->var->data.mode != nir_var_uniform &&
> instr->variables[0]->var->data.mode !=
nir_var_shader_storage);
> + /* Currently, writemasks must cover the entire value */
> + assert(instr->const_index[0] == (1 << instr->num_components) - 1);
> break;
> }
> case nir_intrinsic_copy_var:
> diff --git a/src/mesa/program/prog_to_nir.c
b/src/mesa/program/prog_to_nir.c
> index 539e3c0..48f47b8 100644
> --- a/src/mesa/program/prog_to_nir.c
> +++ b/src/mesa/program/prog_to_nir.c
> @@ -927,6 +927,7 @@ ptn_add_output_stores(struct ptn_compile *c)
> nir_intrinsic_instr *store =
> nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
> store->num_components = glsl_get_vector_elements(var->type);
> + store->const_index[0] = (1 << store->num_components) - 1;
> store->variables[0] =
> nir_deref_var_create(store, c->output_vars[var->data.location]);
>
> @@ -997,6 +998,7 @@ setup_registers_and_variables(struct ptn_compile *c)
> nir_intrinsic_instr *store =
> nir_intrinsic_instr_create(shader,
nir_intrinsic_store_var);
> store->num_components = 4;
> + store->const_index[0] = WRITEMASK_XYZW;
> store->variables[0] = nir_deref_var_create(store, fullvar);
> store->src[0] = nir_src_for_ssa(f001);
> nir_builder_instr_insert(b, &store->instr);
> --
> 2.6.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20151212/3b8a1cfa/attachment-0001.html>
More information about the mesa-dev
mailing list