[Mesa-dev] [PATCH 2/2] nir: Get rid of the array elements parameter on load/store intrinsics

Connor Abbott cwabbott0 at gmail.com
Tue May 19 18:03:46 PDT 2015


On Tue, May 19, 2015 at 8:28 PM, Jason Ekstrand <jason at jlekstrand.net> wrote:
> Previously, we used intrinsic->const_index[1] to represent "the number of
> array elements to load" for load/store intrinsics.  However, this set to 1
> by every pass that ever creates a load/store intrinsic.  Also, while it
> might make some sense for registers, it makes no sense whatsoever in SSA.
> On top of that, the i965 backend was the only backend to ever support it;
> freedreno and vc4 just assert that it's always 1.  Let's just delete it.
> ---
>  src/gallium/auxiliary/nir/tgsi_to_nir.c            |  2 -
>  .../drivers/freedreno/ir3/ir3_compiler_nir.c       |  5 --
>  src/gallium/drivers/vc4/vc4_program.c              |  6 ---
>  src/glsl/nir/nir_intrinsics.h                      | 17 ++++---
>  src/glsl/nir/nir_lower_io.c                        |  2 -
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp           | 56 ++++++++++------------
>  6 files changed, 33 insertions(+), 55 deletions(-)
>
> diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
> index 59aaf67..f0d577a 100644
> --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
> +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
> @@ -401,7 +401,6 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
>
>        load->num_components = 4;
>        load->const_index[0] = index;
> -      load->const_index[1] = 1;
>        if (dim) {
>           if (dimind) {
>              load->src[srcn] =
> @@ -1672,7 +1671,6 @@ ttn_add_output_stores(struct ttn_compile *c)
>              nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
>           store->num_components = 4;
>           store->const_index[0] = var->data.driver_location + i;
> -         store->const_index[1] = 1;
>           store->src[0].reg.reg = c->output_regs[var->data.driver_location].reg;
>           nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
>        }
> diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
> index 05e7049..2cf25ea 100644
> --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
> +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
> @@ -1158,14 +1158,12 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
>
>         switch (intr->intrinsic) {
>         case nir_intrinsic_load_uniform:
> -               compile_assert(ctx, intr->const_index[1] == 1);
>                 for (int i = 0; i < intr->num_components; i++) {
>                         unsigned n = idx * 4 + i;
>                         dst[i] = create_uniform(ctx, n);
>                 }
>                 break;
>         case nir_intrinsic_load_uniform_indirect:
> -               compile_assert(ctx, intr->const_index[1] == 1);
>                 src = get_src(ctx, &intr->src[0]);
>                 for (int i = 0; i < intr->num_components; i++) {
>                         unsigned n = idx * 4 + i;
> @@ -1178,14 +1176,12 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
>                 emit_intrinsic_load_ubo(ctx, intr, dst);
>                 break;
>         case nir_intrinsic_load_input:
> -               compile_assert(ctx, intr->const_index[1] == 1);
>                 for (int i = 0; i < intr->num_components; i++) {
>                         unsigned n = idx * 4 + i;
>                         dst[i] = b->inputs[n];
>                 }
>                 break;
>         case nir_intrinsic_load_input_indirect:
> -               compile_assert(ctx, intr->const_index[1] == 1);
>                 src = get_src(ctx, &intr->src[0]);
>                 struct ir3_instruction *collect =
>                                 create_collect(b, b->inputs, b->ninputs);
> @@ -1202,7 +1198,6 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
>                 emit_intrinisic_store_var(ctx, intr);
>                 break;
>         case nir_intrinsic_store_output:
> -               compile_assert(ctx, intr->const_index[1] == 1);
>                 src = get_src(ctx, &intr->src[0]);
>                 for (int i = 0; i < intr->num_components; i++) {
>                         unsigned n = idx * 4 + i;
> diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
> index bf156f9..d84e5f2 100644
> --- a/src/gallium/drivers/vc4/vc4_program.c
> +++ b/src/gallium/drivers/vc4/vc4_program.c
> @@ -1849,8 +1849,6 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
>
>          switch (instr->intrinsic) {
>          case nir_intrinsic_load_uniform:
> -                assert(instr->const_index[1] == 1);
> -
>                  for (int i = 0; i < instr->num_components; i++) {
>                          dest[i] = qir_uniform(c, QUNIFORM_UNIFORM,
>                                                instr->const_index[0] * 4 + i);
> @@ -1858,8 +1856,6 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
>                  break;
>
>          case nir_intrinsic_load_uniform_indirect:
> -                assert(instr->const_index[1] == 1);
> -
>                  for (int i = 0; i < instr->num_components; i++) {
>                          dest[i] = indirect_uniform_load(c,
>                                                          ntq_get_src(c, instr->src[0], 0),
> @@ -1870,8 +1866,6 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
>                  break;
>
>          case nir_intrinsic_load_input:
> -                assert(instr->const_index[1] == 1);
> -
>                  for (int i = 0; i < instr->num_components; i++)
>                          dest[i] = c->inputs[instr->const_index[0] * 4 + i];
>
> diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
> index 10192c5..27c04bc 100644
> --- a/src/glsl/nir/nir_intrinsics.h
> +++ b/src/glsl/nir/nir_intrinsics.h
> @@ -138,12 +138,11 @@ SYSTEM_VALUE(sample_mask_in, 1)
>  SYSTEM_VALUE(invocation_id, 1)
>
>  /*
> - * The first index is the address to load from, and the second index is the
> - * number of array elements to load.  Indirect loads have an additional
> - * register input, which is added to the constant address to compute the
> - * final address to load from.  For UBO's (and SSBO's), the first source is
> - * the (possibly constant) UBO buffer index and the indirect (if it exists)
> - * is the second source.
> + * The first and only index is the base address to load from.  Indirect
> + * loads have an additional register input, which is added to the constant
> + * address to compute the final address to load from.  For UBO's (and
> + * SSBO's), the first source is the (possibly constant) UBO buffer index
> + * and the indirect (if it exists) is the second source.
>   *
>   * For vector backends, the address is in terms of one vec4, and so each array
>   * element is +4 scalar components from the previous array element. For scalar
> @@ -152,9 +151,9 @@ SYSTEM_VALUE(invocation_id, 1)
>   */
>
>  #define LOAD(name, extra_srcs, flags) \
> -   INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 2, flags) \
> +   INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 1, flags) \
>     INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \
> -             true, 0, 0, 2, flags)
> +             true, 0, 0, 1, flags)
>
>  LOAD(uniform, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
>  LOAD(ubo, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
> @@ -172,7 +171,7 @@ LOAD(input, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
>     INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
>               num_indices, flags) \
>
> -STORE(output, 2, 0)
> +STORE(output, 1, 0)
>  /* STORE(ssbo, 3, 0) */

We should probably fix the commented-out SSBO code so the Igalia folks
aren't *too* confused when they see this shortly :). Otherwise,

Reviewed-by: Connor Abbott <cwabbott0 at gmail.com>

>
>  LAST_INTRINSIC(store_output_indirect)
> diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
> index 03eed04..6761d5b 100644
> --- a/src/glsl/nir/nir_lower_io.c
> +++ b/src/glsl/nir/nir_lower_io.c
> @@ -288,7 +288,6 @@ nir_lower_io_block(nir_block *block, void *void_state)
>           offset += intrin->variables[0]->var->data.driver_location;
>
>           load->const_index[0] = offset;
> -         load->const_index[1] = 1;
>
>           if (has_indirect)
>              load->src[0] = indirect;
> @@ -331,7 +330,6 @@ nir_lower_io_block(nir_block *block, void *void_state)
>           offset += intrin->variables[0]->var->data.driver_location;
>
>           store->const_index[0] = offset;
> -         store->const_index[1] = 1;
>
>           nir_src_copy(&store->src[0], &intrin->src[0], state->mem_ctx);
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 5dd8363..56a2278 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -1345,16 +1345,14 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
>           index -= num_direct_uniforms;
>        }
>
> -      for (int i = 0; i < instr->const_index[1]; i++) {
> -         for (unsigned j = 0; j < instr->num_components; j++) {
> -            fs_reg src = offset(retype(uniform_reg, dest.type), index);
> -            if (has_indirect)
> -               src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
> -            index++;
> -
> -            emit(MOV(dest, src));
> -            dest = offset(dest, 1);
> -         }
> +      for (unsigned j = 0; j < instr->num_components; j++) {
> +         fs_reg src = offset(retype(uniform_reg, dest.type), index);
> +         if (has_indirect)
> +            src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
> +         index++;
> +
> +         emit(MOV(dest, src));
> +         dest = offset(dest, 1);
>        }
>        break;
>     }
> @@ -1426,17 +1424,15 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
>        /* fallthrough */
>     case nir_intrinsic_load_input: {
>        unsigned index = 0;
> -      for (int i = 0; i < instr->const_index[1]; i++) {
> -         for (unsigned j = 0; j < instr->num_components; j++) {
> -            fs_reg src = offset(retype(nir_inputs, dest.type),
> -                                instr->const_index[0] + index);
> -            if (has_indirect)
> -               src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
> -            index++;
> -
> -            emit(MOV(dest, src));
> -            dest = offset(dest, 1);
> -         }
> +      for (unsigned j = 0; j < instr->num_components; j++) {
> +         fs_reg src = offset(retype(nir_inputs, dest.type),
> +                             instr->const_index[0] + index);
> +         if (has_indirect)
> +            src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
> +         index++;
> +
> +         emit(MOV(dest, src));
> +         dest = offset(dest, 1);
>        }
>        break;
>     }
> @@ -1564,16 +1560,14 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
>     case nir_intrinsic_store_output: {
>        fs_reg src = get_nir_src(instr->src[0]);
>        unsigned index = 0;
> -      for (int i = 0; i < instr->const_index[1]; i++) {
> -         for (unsigned j = 0; j < instr->num_components; j++) {
> -            fs_reg new_dest = offset(retype(nir_outputs, src.type),
> -                                     instr->const_index[0] + index);
> -            if (has_indirect)
> -               src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1]));
> -            index++;
> -            emit(MOV(new_dest, src));
> -            src = offset(src, 1);
> -         }
> +      for (unsigned j = 0; j < instr->num_components; j++) {
> +         fs_reg new_dest = offset(retype(nir_outputs, src.type),
> +                                  instr->const_index[0] + index);
> +         if (has_indirect)
> +            src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1]));
> +         index++;
> +         emit(MOV(new_dest, src));
> +         src = offset(src, 1);
>        }
>        break;
>     }
> --
> 2.4.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list