[Mesa-dev] [PATCH 2/2] nir: Get rid of the array elements parameter on load/store intrinsics
Connor Abbott
cwabbott0 at gmail.com
Tue May 19 18:03:46 PDT 2015
On Tue, May 19, 2015 at 8:28 PM, Jason Ekstrand <jason at jlekstrand.net> wrote:
> Previously, we used intrinsic->const_index[1] to represent "the number of
> array elements to load" for load/store intrinsics. However, this set to 1
> by every pass that ever creates a load/store intrinsic. Also, while it
> might make some sense for registers, it makes no sense whatsoever in SSA.
> On top of that, the i965 backend was the only backend to ever support it;
> freedreno and vc4 just assert that it's always 1. Let's just delete it.
> ---
> src/gallium/auxiliary/nir/tgsi_to_nir.c | 2 -
> .../drivers/freedreno/ir3/ir3_compiler_nir.c | 5 --
> src/gallium/drivers/vc4/vc4_program.c | 6 ---
> src/glsl/nir/nir_intrinsics.h | 17 ++++---
> src/glsl/nir/nir_lower_io.c | 2 -
> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 56 ++++++++++------------
> 6 files changed, 33 insertions(+), 55 deletions(-)
>
> diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
> index 59aaf67..f0d577a 100644
> --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
> +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
> @@ -401,7 +401,6 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
>
> load->num_components = 4;
> load->const_index[0] = index;
> - load->const_index[1] = 1;
> if (dim) {
> if (dimind) {
> load->src[srcn] =
> @@ -1672,7 +1671,6 @@ ttn_add_output_stores(struct ttn_compile *c)
> nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
> store->num_components = 4;
> store->const_index[0] = var->data.driver_location + i;
> - store->const_index[1] = 1;
> store->src[0].reg.reg = c->output_regs[var->data.driver_location].reg;
> nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
> }
> diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
> index 05e7049..2cf25ea 100644
> --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
> +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
> @@ -1158,14 +1158,12 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
>
> switch (intr->intrinsic) {
> case nir_intrinsic_load_uniform:
> - compile_assert(ctx, intr->const_index[1] == 1);
> for (int i = 0; i < intr->num_components; i++) {
> unsigned n = idx * 4 + i;
> dst[i] = create_uniform(ctx, n);
> }
> break;
> case nir_intrinsic_load_uniform_indirect:
> - compile_assert(ctx, intr->const_index[1] == 1);
> src = get_src(ctx, &intr->src[0]);
> for (int i = 0; i < intr->num_components; i++) {
> unsigned n = idx * 4 + i;
> @@ -1178,14 +1176,12 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
> emit_intrinsic_load_ubo(ctx, intr, dst);
> break;
> case nir_intrinsic_load_input:
> - compile_assert(ctx, intr->const_index[1] == 1);
> for (int i = 0; i < intr->num_components; i++) {
> unsigned n = idx * 4 + i;
> dst[i] = b->inputs[n];
> }
> break;
> case nir_intrinsic_load_input_indirect:
> - compile_assert(ctx, intr->const_index[1] == 1);
> src = get_src(ctx, &intr->src[0]);
> struct ir3_instruction *collect =
> create_collect(b, b->inputs, b->ninputs);
> @@ -1202,7 +1198,6 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
> emit_intrinisic_store_var(ctx, intr);
> break;
> case nir_intrinsic_store_output:
> - compile_assert(ctx, intr->const_index[1] == 1);
> src = get_src(ctx, &intr->src[0]);
> for (int i = 0; i < intr->num_components; i++) {
> unsigned n = idx * 4 + i;
> diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
> index bf156f9..d84e5f2 100644
> --- a/src/gallium/drivers/vc4/vc4_program.c
> +++ b/src/gallium/drivers/vc4/vc4_program.c
> @@ -1849,8 +1849,6 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
>
> switch (instr->intrinsic) {
> case nir_intrinsic_load_uniform:
> - assert(instr->const_index[1] == 1);
> -
> for (int i = 0; i < instr->num_components; i++) {
> dest[i] = qir_uniform(c, QUNIFORM_UNIFORM,
> instr->const_index[0] * 4 + i);
> @@ -1858,8 +1856,6 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
> break;
>
> case nir_intrinsic_load_uniform_indirect:
> - assert(instr->const_index[1] == 1);
> -
> for (int i = 0; i < instr->num_components; i++) {
> dest[i] = indirect_uniform_load(c,
> ntq_get_src(c, instr->src[0], 0),
> @@ -1870,8 +1866,6 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
> break;
>
> case nir_intrinsic_load_input:
> - assert(instr->const_index[1] == 1);
> -
> for (int i = 0; i < instr->num_components; i++)
> dest[i] = c->inputs[instr->const_index[0] * 4 + i];
>
> diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
> index 10192c5..27c04bc 100644
> --- a/src/glsl/nir/nir_intrinsics.h
> +++ b/src/glsl/nir/nir_intrinsics.h
> @@ -138,12 +138,11 @@ SYSTEM_VALUE(sample_mask_in, 1)
> SYSTEM_VALUE(invocation_id, 1)
>
> /*
> - * The first index is the address to load from, and the second index is the
> - * number of array elements to load. Indirect loads have an additional
> - * register input, which is added to the constant address to compute the
> - * final address to load from. For UBO's (and SSBO's), the first source is
> - * the (possibly constant) UBO buffer index and the indirect (if it exists)
> - * is the second source.
> + * The first and only index is the base address to load from. Indirect
> + * loads have an additional register input, which is added to the constant
> + * address to compute the final address to load from. For UBO's (and
> + * SSBO's), the first source is the (possibly constant) UBO buffer index
> + * and the indirect (if it exists) is the second source.
> *
> * For vector backends, the address is in terms of one vec4, and so each array
> * element is +4 scalar components from the previous array element. For scalar
> @@ -152,9 +151,9 @@ SYSTEM_VALUE(invocation_id, 1)
> */
>
> #define LOAD(name, extra_srcs, flags) \
> - INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 2, flags) \
> + INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 1, flags) \
> INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \
> - true, 0, 0, 2, flags)
> + true, 0, 0, 1, flags)
>
> LOAD(uniform, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
> LOAD(ubo, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
> @@ -172,7 +171,7 @@ LOAD(input, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
> INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
> num_indices, flags) \
>
> -STORE(output, 2, 0)
> +STORE(output, 1, 0)
> /* STORE(ssbo, 3, 0) */
We should probably fix the commented-out SSBO code so the Igalia folks
aren't *too* confused when they see this shortly :). Otherwise,
Reviewed-by: Connor Abbott <cwabbott0 at gmail.com>
>
> LAST_INTRINSIC(store_output_indirect)
> diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
> index 03eed04..6761d5b 100644
> --- a/src/glsl/nir/nir_lower_io.c
> +++ b/src/glsl/nir/nir_lower_io.c
> @@ -288,7 +288,6 @@ nir_lower_io_block(nir_block *block, void *void_state)
> offset += intrin->variables[0]->var->data.driver_location;
>
> load->const_index[0] = offset;
> - load->const_index[1] = 1;
>
> if (has_indirect)
> load->src[0] = indirect;
> @@ -331,7 +330,6 @@ nir_lower_io_block(nir_block *block, void *void_state)
> offset += intrin->variables[0]->var->data.driver_location;
>
> store->const_index[0] = offset;
> - store->const_index[1] = 1;
>
> nir_src_copy(&store->src[0], &intrin->src[0], state->mem_ctx);
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 5dd8363..56a2278 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -1345,16 +1345,14 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
> index -= num_direct_uniforms;
> }
>
> - for (int i = 0; i < instr->const_index[1]; i++) {
> - for (unsigned j = 0; j < instr->num_components; j++) {
> - fs_reg src = offset(retype(uniform_reg, dest.type), index);
> - if (has_indirect)
> - src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
> - index++;
> -
> - emit(MOV(dest, src));
> - dest = offset(dest, 1);
> - }
> + for (unsigned j = 0; j < instr->num_components; j++) {
> + fs_reg src = offset(retype(uniform_reg, dest.type), index);
> + if (has_indirect)
> + src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
> + index++;
> +
> + emit(MOV(dest, src));
> + dest = offset(dest, 1);
> }
> break;
> }
> @@ -1426,17 +1424,15 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
> /* fallthrough */
> case nir_intrinsic_load_input: {
> unsigned index = 0;
> - for (int i = 0; i < instr->const_index[1]; i++) {
> - for (unsigned j = 0; j < instr->num_components; j++) {
> - fs_reg src = offset(retype(nir_inputs, dest.type),
> - instr->const_index[0] + index);
> - if (has_indirect)
> - src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
> - index++;
> -
> - emit(MOV(dest, src));
> - dest = offset(dest, 1);
> - }
> + for (unsigned j = 0; j < instr->num_components; j++) {
> + fs_reg src = offset(retype(nir_inputs, dest.type),
> + instr->const_index[0] + index);
> + if (has_indirect)
> + src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
> + index++;
> +
> + emit(MOV(dest, src));
> + dest = offset(dest, 1);
> }
> break;
> }
> @@ -1564,16 +1560,14 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
> case nir_intrinsic_store_output: {
> fs_reg src = get_nir_src(instr->src[0]);
> unsigned index = 0;
> - for (int i = 0; i < instr->const_index[1]; i++) {
> - for (unsigned j = 0; j < instr->num_components; j++) {
> - fs_reg new_dest = offset(retype(nir_outputs, src.type),
> - instr->const_index[0] + index);
> - if (has_indirect)
> - src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1]));
> - index++;
> - emit(MOV(new_dest, src));
> - src = offset(src, 1);
> - }
> + for (unsigned j = 0; j < instr->num_components; j++) {
> + fs_reg new_dest = offset(retype(nir_outputs, src.type),
> + instr->const_index[0] + index);
> + if (has_indirect)
> + src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1]));
> + index++;
> + emit(MOV(new_dest, src));
> + src = offset(src, 1);
> }
> break;
> }
> --
> 2.4.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list