[Mesa-dev] [PATCH 2/2] nir: Get rid of the array elements parameter on load/store intrinsics

Jason Ekstrand jason at jlekstrand.net
Tue May 19 17:28:18 PDT 2015


Previously, we used intrinsic->const_index[1] to represent "the number of
array elements to load" for load/store intrinsics.  However, this set to 1
by every pass that ever creates a load/store intrinsic.  Also, while it
might make some sense for registers, it makes no sense whatsoever in SSA.
On top of that, the i965 backend was the only backend to ever support it;
freedreno and vc4 just assert that it's always 1.  Let's just delete it.
---
 src/gallium/auxiliary/nir/tgsi_to_nir.c            |  2 -
 .../drivers/freedreno/ir3/ir3_compiler_nir.c       |  5 --
 src/gallium/drivers/vc4/vc4_program.c              |  6 ---
 src/glsl/nir/nir_intrinsics.h                      | 17 ++++---
 src/glsl/nir/nir_lower_io.c                        |  2 -
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp           | 56 ++++++++++------------
 6 files changed, 33 insertions(+), 55 deletions(-)

diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 59aaf67..f0d577a 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -401,7 +401,6 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
 
       load->num_components = 4;
       load->const_index[0] = index;
-      load->const_index[1] = 1;
       if (dim) {
          if (dimind) {
             load->src[srcn] =
@@ -1672,7 +1671,6 @@ ttn_add_output_stores(struct ttn_compile *c)
             nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
          store->num_components = 4;
          store->const_index[0] = var->data.driver_location + i;
-         store->const_index[1] = 1;
          store->src[0].reg.reg = c->output_regs[var->data.driver_location].reg;
          nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
       }
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 05e7049..2cf25ea 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1158,14 +1158,12 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
 
 	switch (intr->intrinsic) {
 	case nir_intrinsic_load_uniform:
-		compile_assert(ctx, intr->const_index[1] == 1);
 		for (int i = 0; i < intr->num_components; i++) {
 			unsigned n = idx * 4 + i;
 			dst[i] = create_uniform(ctx, n);
 		}
 		break;
 	case nir_intrinsic_load_uniform_indirect:
-		compile_assert(ctx, intr->const_index[1] == 1);
 		src = get_src(ctx, &intr->src[0]);
 		for (int i = 0; i < intr->num_components; i++) {
 			unsigned n = idx * 4 + i;
@@ -1178,14 +1176,12 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
 		emit_intrinsic_load_ubo(ctx, intr, dst);
 		break;
 	case nir_intrinsic_load_input:
-		compile_assert(ctx, intr->const_index[1] == 1);
 		for (int i = 0; i < intr->num_components; i++) {
 			unsigned n = idx * 4 + i;
 			dst[i] = b->inputs[n];
 		}
 		break;
 	case nir_intrinsic_load_input_indirect:
-		compile_assert(ctx, intr->const_index[1] == 1);
 		src = get_src(ctx, &intr->src[0]);
 		struct ir3_instruction *collect =
 				create_collect(b, b->inputs, b->ninputs);
@@ -1202,7 +1198,6 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
 		emit_intrinisic_store_var(ctx, intr);
 		break;
 	case nir_intrinsic_store_output:
-		compile_assert(ctx, intr->const_index[1] == 1);
 		src = get_src(ctx, &intr->src[0]);
 		for (int i = 0; i < intr->num_components; i++) {
 			unsigned n = idx * 4 + i;
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index bf156f9..d84e5f2 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1849,8 +1849,6 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
 
         switch (instr->intrinsic) {
         case nir_intrinsic_load_uniform:
-                assert(instr->const_index[1] == 1);
-
                 for (int i = 0; i < instr->num_components; i++) {
                         dest[i] = qir_uniform(c, QUNIFORM_UNIFORM,
                                               instr->const_index[0] * 4 + i);
@@ -1858,8 +1856,6 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
                 break;
 
         case nir_intrinsic_load_uniform_indirect:
-                assert(instr->const_index[1] == 1);
-
                 for (int i = 0; i < instr->num_components; i++) {
                         dest[i] = indirect_uniform_load(c,
                                                         ntq_get_src(c, instr->src[0], 0),
@@ -1870,8 +1866,6 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
                 break;
 
         case nir_intrinsic_load_input:
-                assert(instr->const_index[1] == 1);
-
                 for (int i = 0; i < instr->num_components; i++)
                         dest[i] = c->inputs[instr->const_index[0] * 4 + i];
 
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index 10192c5..27c04bc 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -138,12 +138,11 @@ SYSTEM_VALUE(sample_mask_in, 1)
 SYSTEM_VALUE(invocation_id, 1)
 
 /*
- * The first index is the address to load from, and the second index is the
- * number of array elements to load.  Indirect loads have an additional
- * register input, which is added to the constant address to compute the
- * final address to load from.  For UBO's (and SSBO's), the first source is
- * the (possibly constant) UBO buffer index and the indirect (if it exists)
- * is the second source.
+ * The first and only index is the base address to load from.  Indirect
+ * loads have an additional register input, which is added to the constant
+ * address to compute the final address to load from.  For UBO's (and
+ * SSBO's), the first source is the (possibly constant) UBO buffer index
+ * and the indirect (if it exists) is the second source.
  *
  * For vector backends, the address is in terms of one vec4, and so each array
  * element is +4 scalar components from the previous array element. For scalar
@@ -152,9 +151,9 @@ SYSTEM_VALUE(invocation_id, 1)
  */
 
 #define LOAD(name, extra_srcs, flags) \
-   INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 2, flags) \
+   INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 1, flags) \
    INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \
-             true, 0, 0, 2, flags)
+             true, 0, 0, 1, flags)
 
 LOAD(uniform, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 LOAD(ubo, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
@@ -172,7 +171,7 @@ LOAD(input, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
    INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
              num_indices, flags) \
 
-STORE(output, 2, 0)
+STORE(output, 1, 0)
 /* STORE(ssbo, 3, 0) */
 
 LAST_INTRINSIC(store_output_indirect)
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
index 03eed04..6761d5b 100644
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -288,7 +288,6 @@ nir_lower_io_block(nir_block *block, void *void_state)
          offset += intrin->variables[0]->var->data.driver_location;
 
          load->const_index[0] = offset;
-         load->const_index[1] = 1;
 
          if (has_indirect)
             load->src[0] = indirect;
@@ -331,7 +330,6 @@ nir_lower_io_block(nir_block *block, void *void_state)
          offset += intrin->variables[0]->var->data.driver_location;
 
          store->const_index[0] = offset;
-         store->const_index[1] = 1;
 
          nir_src_copy(&store->src[0], &intrin->src[0], state->mem_ctx);
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 5dd8363..56a2278 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1345,16 +1345,14 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
          index -= num_direct_uniforms;
       }
 
-      for (int i = 0; i < instr->const_index[1]; i++) {
-         for (unsigned j = 0; j < instr->num_components; j++) {
-            fs_reg src = offset(retype(uniform_reg, dest.type), index);
-            if (has_indirect)
-               src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
-            index++;
-
-            emit(MOV(dest, src));
-            dest = offset(dest, 1);
-         }
+      for (unsigned j = 0; j < instr->num_components; j++) {
+         fs_reg src = offset(retype(uniform_reg, dest.type), index);
+         if (has_indirect)
+            src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
+         index++;
+
+         emit(MOV(dest, src));
+         dest = offset(dest, 1);
       }
       break;
    }
@@ -1426,17 +1424,15 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       /* fallthrough */
    case nir_intrinsic_load_input: {
       unsigned index = 0;
-      for (int i = 0; i < instr->const_index[1]; i++) {
-         for (unsigned j = 0; j < instr->num_components; j++) {
-            fs_reg src = offset(retype(nir_inputs, dest.type),
-                                instr->const_index[0] + index);
-            if (has_indirect)
-               src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
-            index++;
-
-            emit(MOV(dest, src));
-            dest = offset(dest, 1);
-         }
+      for (unsigned j = 0; j < instr->num_components; j++) {
+         fs_reg src = offset(retype(nir_inputs, dest.type),
+                             instr->const_index[0] + index);
+         if (has_indirect)
+            src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
+         index++;
+
+         emit(MOV(dest, src));
+         dest = offset(dest, 1);
       }
       break;
    }
@@ -1564,16 +1560,14 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
    case nir_intrinsic_store_output: {
       fs_reg src = get_nir_src(instr->src[0]);
       unsigned index = 0;
-      for (int i = 0; i < instr->const_index[1]; i++) {
-         for (unsigned j = 0; j < instr->num_components; j++) {
-            fs_reg new_dest = offset(retype(nir_outputs, src.type),
-                                     instr->const_index[0] + index);
-            if (has_indirect)
-               src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1]));
-            index++;
-            emit(MOV(new_dest, src));
-            src = offset(src, 1);
-         }
+      for (unsigned j = 0; j < instr->num_components; j++) {
+         fs_reg new_dest = offset(retype(nir_outputs, src.type),
+                                  instr->const_index[0] + index);
+         if (has_indirect)
+            src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1]));
+         index++;
+         emit(MOV(new_dest, src));
+         src = offset(src, 1);
       }
       break;
    }
-- 
2.4.1



More information about the mesa-dev mailing list