Mesa (master): intel/nir: Use nir_extract_bits in lower_mem_access_bit_sizes

Mon Nov 11 17:17:16 UTC 2019

Module: Mesa
Branch: master
Commit: 83f04d80b0eda4a0eedebe183b3da09cf844c05c
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=83f04d80b0eda4a0eedebe183b3da09cf844c05c

Author: Jason Ekstrand <jason at jlekstrand.net>
Date:   Wed Nov  6 12:36:28 2019 -0600

intel/nir: Use nir_extract_bits in lower_mem_access_bit_sizes

The new helper solves most of the annoying problems with data wrangling
in brw_nir_lower_mem_access_bit_sizes.

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira at intel.com>

---

 .../compiler/brw_nir_lower_mem_access_bit_sizes.c  | 52 +++++++---------------
 1 file changed, 15 insertions(+), 37 deletions(-)

diff --git a/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c b/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c
index 5eff6639e51..80aae4916bf 100644
--- a/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c
+++ b/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c
@@ -85,8 +85,7 @@ lower_mem_load_bit_size(nir_builder *b, nir_intrinsic_instr *intrin)
    const unsigned bytes_read = num_components * (bit_size / 8);
    const unsigned align = nir_intrinsic_align(intrin);
 
-   nir_ssa_def *result[NIR_MAX_VEC_COMPONENTS] = { NULL, };
-
+   nir_ssa_def *result;
    nir_src *offset_src = nir_get_io_offset_src(intrin);
    if (bit_size < 32 && nir_src_is_const(*offset_src)) {
       /* The offset is constant so we can use a 32-bit load and just shift it
@@ -102,21 +101,12 @@ lower_mem_load_bit_size(nir_builder *b, nir_intrinsic_instr *intrin)
 
       nir_ssa_def *load = dup_mem_intrinsic(b, intrin, NULL, -load_offset,
                                             load_comps32, 32, 4);
-      nir_ssa_def *unpacked[3];
-      for (unsigned i = 0; i < load_comps32; i++)
-         unpacked[i] = nir_unpack_bits(b, nir_channel(b, load, i), bit_size);
-
-      assert(load_offset % (bit_size / 8) == 0);
-      const unsigned divisor = 32 / bit_size;
-
-      for (unsigned i = 0; i < num_components; i++) {
-         unsigned load_i = i + load_offset / (bit_size / 8);
-         result[i] = nir_channel(b, unpacked[load_i / divisor],
-                                    load_i % divisor);
-      }
+      result = nir_extract_bits(b, &load, 1, load_offset * 8,
+                                num_components, bit_size);
    } else {
       /* Otherwise, we have to break it into smaller loads */
-      unsigned res_idx = 0;
+      nir_ssa_def *loads[8];
+      unsigned num_loads = 0;
       int load_offset = 0;
       while (load_offset < bytes_read) {
          const unsigned bytes_left = bytes_read - load_offset;
@@ -131,23 +121,19 @@ lower_mem_load_bit_size(nir_builder *b, nir_intrinsic_instr *intrin)
             load_comps = DIV_ROUND_UP(MIN2(bytes_left, 16), 4);
          }
 
-         nir_ssa_def *load = dup_mem_intrinsic(b, intrin, NULL, load_offset,
-                                               load_comps, load_bit_size,
-                                               align);
-
-         nir_ssa_def *unpacked = nir_bitcast_vector(b, load, bit_size);
-         for (unsigned i = 0; i < unpacked->num_components; i++) {
-            if (res_idx < num_components)
-               result[res_idx++] = nir_channel(b, unpacked, i);
-         }
+         loads[num_loads++] = dup_mem_intrinsic(b, intrin, NULL, load_offset,
+                                                load_comps, load_bit_size,
+                                                align);
 
          load_offset += load_comps * (load_bit_size / 8);
       }
+      assert(num_loads <= ARRAY_SIZE(loads));
+      result = nir_extract_bits(b, loads, num_loads, 0,
+                                num_components, bit_size);
    }
 
-   nir_ssa_def *vec_result = nir_vec(b, result, num_components);
    nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                            nir_src_for_ssa(vec_result));
+                            nir_src_for_ssa(result));
    nir_instr_remove(&intrin->instr);
 
    return true;
@@ -219,19 +205,11 @@ lower_mem_store_bit_size(nir_builder *b, nir_intrinsic_instr *intrin)
          if (store_bit_size == 24)
             store_bit_size = 16;
       }
-
       const unsigned store_bytes = store_comps * (store_bit_size / 8);
       assert(store_bytes % byte_size == 0);
-      const unsigned store_first_src_comp = start / byte_size;
-      const unsigned store_src_comps = store_bytes / byte_size;
-      assert(store_first_src_comp + store_src_comps <= num_components);
-
-      unsigned src_swiz[4] = { 0, };
-      for (unsigned i = 0; i < store_src_comps; i++)
-         src_swiz[i] = store_first_src_comp + i;
-      nir_ssa_def *store_value =
-         nir_swizzle(b, value, src_swiz, store_src_comps);
-      nir_ssa_def *packed = nir_bitcast_vector(b, store_value, store_bit_size);
+
+      nir_ssa_def *packed = nir_extract_bits(b, &value, 1, start * 8,
+                                             store_comps, store_bit_size);
 
       dup_mem_intrinsic(b, intrin, packed, start,
                         store_comps, store_bit_size, store_align);